diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 00000000..190610a7 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,41 @@ +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: pre-commit + +on: + pull_request: + push: + branches: [main] + +jobs: + pre-commit: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + - uses: pre-commit/action@v3.0.0 + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..8c97af6d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,74 @@ +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +repos: +- repo: https://github.com/timothycrosley/isort + rev: 5.12.0 + hooks: + - id: isort + additional_dependencies: [toml] +- repo: https://github.com/psf/black + rev: 23.1.0 + hooks: + - id: black + types_or: [python, cython] +- repo: https://github.com/PyCQA/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501] + types_or: [python, cython] +- repo: https://github.com/pre-commit/mirrors-clang-format + rev: v16.0.5 + hooks: + - id: clang-format + types_or: [c, c++, cuda, proto, textproto, java] + args: ["-fallback-style=none", "-style=file", "-i"] +- repo: https://github.com/codespell-project/codespell + rev: v2.2.4 + hooks: + - id: codespell + additional_dependencies: [tomli] + args: ["--toml", "pyproject.toml"] + exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$) +# More details about these pre-commit hooks here: +# https://pre-commit.com/hooks.html +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-merge-conflict + - id: check-json + - id: check-toml + - id: check-yaml + - id: check-shebang-scripts-are-executable + - id: end-of-file-fixer + types_or: [c, c++, cuda, proto, textproto, java, python] + - id: mixed-line-ending + - id: requirements-txt-fixer + - id: trailing-whitespace + diff --git a/CMakeLists.txt b/CMakeLists.txt index 970d1da2..16995795 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -179,7 +179,7 @@ else() ############################################################################## # - install targets----------------------------------------------------------- - + add_library( ${BACKEND_TARGET} SHARED src/api.cc @@ -208,12 +208,12 @@ else() INTERFACE_POSITION_INDEPENDENT_CODE ON ) endif() - + target_compile_options(${BACKEND_TARGET} PRIVATE "$<$:${RAPIDS_TRITON_BACKEND_CXX_FLAGS}>" "$<$:${RAPIDS_TRITON_BACKEND_CUDA_FLAGS}>" ) - + target_include_directories(${BACKEND_TARGET} PRIVATE "$" "${CMAKE_CURRENT_SOURCE_DIR}/src" @@ -246,15 +246,15 @@ else() if(NOT TRITON_FIL_USE_TREELITE_STATIC) list(APPEND BACKEND_TARGET ${TREELITE_LIBS_NO_PREFIX}) endif() - + install( TARGETS ${BACKEND_TARGET} LIBRARY DESTINATION ${BACKEND_FOLDER}/${BACKEND_NAME} ) - + ############################################################################## # - build test executable ---------------------------------------------------- - + # TODO (wphicks) # if(BUILD_TESTS) # include(test/CMakeLists.txt) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6f5498de..3c3a887a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,25 +42,25 @@ To contribute code to this project, please follow these steps: ``` Developer Certificate of Origin Version 1.1 - + Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 1 Letterman Drive Suite D4700 San Francisco, CA, 94129 - + Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. ``` ``` Developer's Certificate of Origin 1.1 - + By making a contribution to this project, I certify that: - + (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or - + (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or - + (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. - + (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. ``` diff --git a/LICENSE b/LICENSE index b360c424..6218f494 100644 --- a/LICENSE +++ b/LICENSE @@ -187,7 +187,7 @@ identification within third-party archives. Copyright 2021 NVIDIA CORPORATION - + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at diff --git a/README.md b/README.md index b9d08d4d..24f985a5 100644 --- a/README.md +++ b/README.md @@ -121,11 +121,11 @@ model_repository/ backend: "fil" max_batch_size: 32768 input [ - { + { name: "input__0" data_type: TYPE_FP32 - dims: [ $NUM_FEATURES ] - } + dims: [ $NUM_FEATURES ] + } ] output [ { diff --git a/docs/explainability.md b/docs/explainability.md index b9977698..f67d24d8 100644 --- a/docs/explainability.md +++ b/docs/explainability.md @@ -70,11 +70,11 @@ output [ backend: "fil" max_batch_size: 32768 input [ - { + { name: "input__0" data_type: TYPE_FP32 - dims: [ $NUM_FEATURES ] - } + dims: [ $NUM_FEATURES ] + } ] output [ { diff --git a/docs/model_config.md b/docs/model_config.md index 6ab0f9c5..7c1a7214 100644 --- a/docs/model_config.md +++ b/docs/model_config.md @@ -52,12 +52,12 @@ A typical `config.pbtxt` file might look something like this: ```protobuf backend: "fil" max_batch_size: 32768 -input [ - { +input [ + { name: "input__0" data_type: TYPE_FP32 - dims: [ 32 ] - } + dims: [ 32 ] + } ] output [ { @@ -129,11 +129,11 @@ Below, we see an example I/O specification for a model with 32 input features and 3 output classes with the `predict_proba` flag enabled: ``` input [ - { + { name: "input__0" data_type: TYPE_FP32 - dims: [ 32 ] - } + dims: [ 32 ] + } ] output [ { diff --git a/notebooks/simple-xgboost/README.md b/notebooks/simple-xgboost/README.md index 76e0a754..e2828d7d 100644 --- a/notebooks/simple-xgboost/README.md +++ b/notebooks/simple-xgboost/README.md @@ -12,7 +12,7 @@ This notebook is a reference for deploying an XGBoost model on Triton with the F * [Docker](https://docs.docker.com/get-docker/) * [The NVIDIA container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) -## Run the Triton Inference Server container +## Run the Triton Inference Server container **Note:** Due to a bug in release 21.07, Triton's `model_analyzer` cannot be used with the FIL backend. If you wish to use the model analyzer, please use release 21.08 or later. @@ -28,7 +28,7 @@ docker run \ --rm \ --net=host \ --name triton_fil \ - nvcr.io/nvidia/tritonserver: # Put the appropriate tag here. + nvcr.io/nvidia/tritonserver: # Put the appropriate tag here. ``` **Note:** The artifacts created by scripts inside the container are created with root permission. The user on host machine might not be able to modify the artifacts once the container exists. To avoid this issue, copy the notebook `docker cp simple_xgboost_example.ipynb ` and create the artifacts inside the container. diff --git a/ops/gpuci_conda_retry b/ops/gpuci_conda_retry old mode 100644 new mode 100755 diff --git a/ops/move_deps.py b/ops/move_deps.py old mode 100644 new mode 100755 index 46234be1..88cf8f89 --- a/ops/move_deps.py +++ b/ops/move_deps.py @@ -2,19 +2,17 @@ import os import re -import subprocess import shutil +import subprocess from pathlib import Path -MISSING_REGEX = re.compile(r'\n\t(.+)\ =>\ not\ found') -FOUND_REGEX = re.compile(r'\n\t(.+)\ =>\ (.+)\ (\(0[xX][0-9a-fA-F]+\))') +MISSING_REGEX = re.compile(r"\n\t(.+)\ =>\ not\ found") +FOUND_REGEX = re.compile(r"\n\t(.+)\ =>\ (.+)\ (\(0[xX][0-9a-fA-F]+\))") def ldd(path): """Get output of ldd for given file""" - ldd_out = subprocess.run( - ['ldd', path], check=True, capture_output=True, text=True - ) + ldd_out = subprocess.run(["ldd", path], check=True, capture_output=True, text=True) return ldd_out.stdout @@ -52,34 +50,34 @@ def move_dependencies(): location and repeats the analysis until it has satisfied as many missing dependencies as possible. """ - fil_lib = os.getenv('FIL_LIB', 'libtriton_fil.so') - lib_dir = os.getenv('LIB_DIR', '/usr/lib') + fil_lib = os.getenv("FIL_LIB", "libtriton_fil.so") + lib_dir = os.getenv("LIB_DIR", "/usr/lib") - conda_lib_dir = os.getenv('CONDA_LIB_DIR') + conda_lib_dir = os.getenv("CONDA_LIB_DIR") if conda_lib_dir is None: - conda_prefix = os.getenv('CONDA_PREFIX') + conda_prefix = os.getenv("CONDA_PREFIX") if conda_prefix is None: raise RuntimeError( - 'Must set CONDA_LIB_DIR to conda environment lib directory' + "Must set CONDA_LIB_DIR to conda environment lib directory" ) - conda_lib_dir = os.path.join(conda_prefix, 'lib') + conda_lib_dir = os.path.join(conda_prefix, "lib") Path(lib_dir).mkdir(parents=True, exist_ok=True) # Set RUNPATH to conda lib directory to determine locations of # conda-provided dependencies - subprocess.run( - ['patchelf', '--set-rpath', conda_lib_dir, fil_lib], check=True - ) + subprocess.run(["patchelf", "--set-rpath", conda_lib_dir, fil_lib], check=True) ldd_out = ldd(fil_lib) expected_missing = set(get_missing_deps(ldd_out)) deps_map = get_deps_map(ldd_out, required_dir=conda_lib_dir) # Set RUNPATH to final dependency directory - subprocess.run(['patchelf', '--set-rpath', lib_dir, fil_lib], check=True) + subprocess.run(["patchelf", "--set-rpath", lib_dir, fil_lib], check=True) - prev_missing = {None, } + prev_missing = { + None, + } cur_missing = set() while prev_missing != cur_missing: prev_missing = cur_missing @@ -93,11 +91,12 @@ def move_dependencies(): remaining = cur_missing - expected_missing if remaining != {}: - print('Could not find the following dependencies:') + print("Could not find the following dependencies:") for lib in sorted(remaining): print(lib) else: - print('All dependencies found') + print("All dependencies found") + -if __name__ == '__main__': +if __name__ == "__main__": move_dependencies() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..1a8da1f4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,49 @@ +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +[tool.codespell] +# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override - +# this is only to allow you to run codespell interactively +skip = "./.git,./.github" +# ignore short words, and typename parameters like OffsetT +ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b" +# use the 'clear' dictionary for unambiguous spelling mistakes +builtin = "clear" +# disable warnings about binary files and wrong encoding +quiet-level = 3 + +[tool.isort] +profile = "black" +use_parentheses = true +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +ensure_newline_before_comments = true +line_length = 88 +balanced_wrapping = true +indent = " " +skip = ["build"] + diff --git a/qa/L0_e2e/conftest.py b/qa/L0_e2e/conftest.py index de38cf4f..beea52fb 100644 --- a/qa/L0_e2e/conftest.py +++ b/qa/L0_e2e/conftest.py @@ -1,16 +1,13 @@ import os + from hypothesis import settings -settings.register_profile('dev', max_examples=10) -settings.register_profile('ci', max_examples=100) +settings.register_profile("dev", max_examples=10) +settings.register_profile("ci", max_examples=100) + def pytest_addoption(parser): default_repo_path = os.path.join( - os.path.dirname(os.path.abspath(__file__)), - 'model_repository' - ) - parser.addoption( - "--repo", - action="store", - default=default_repo_path + os.path.dirname(os.path.abspath(__file__)), "model_repository" ) + parser.addoption("--repo", action="store", default=default_repo_path) diff --git a/qa/L0_e2e/generate_example_model.py b/qa/L0_e2e/generate_example_model.py index ef365fce..4febae7e 100644 --- a/qa/L0_e2e/generate_example_model.py +++ b/qa/L0_e2e/generate_example_model.py @@ -19,6 +19,7 @@ import cuml from cuml.ensemble import RandomForestClassifier as cuRFC from cuml.ensemble import RandomForestRegressor as cuRFR + try: import lightgbm as lgb except ImportError: @@ -37,9 +38,9 @@ def generate_classification_data(classes=2, rows=1000, cols=32, cat_cols=0): """Generate classification training set""" if cat_cols > 0: - output_type = 'cudf' + output_type = "cudf" else: - output_type = 'numpy' + output_type = "numpy" with cuml.using_output_type(output_type): data, labels = cuml.datasets.make_classification( @@ -47,17 +48,17 @@ def generate_classification_data(classes=2, rows=1000, cols=32, cat_cols=0): n_features=cols, n_informative=cols // 3, n_classes=classes, - random_state=0 + random_state=0, ) if cat_cols > 0: - selected_cols = data.sample(n=min(cat_cols, cols), axis='columns') - negatives = (selected_cols < 0) - positives = (selected_cols >= 0) - selected_cols = selected_cols.astype('object') - selected_cols[negatives] = 'negative' - selected_cols[positives] = 'positive' - data[selected_cols.columns] = selected_cols.astype('category') + selected_cols = data.sample(n=min(cat_cols, cols), axis="columns") + negatives = selected_cols < 0 + positives = selected_cols >= 0 + selected_cols = selected_cols.astype("object") + selected_cols[negatives] = "negative" + selected_cols[positives] = "positive" + data[selected_cols.columns] = selected_cols.astype("category") data = data.to_pandas() labels = labels.to_pandas() return data, labels @@ -66,15 +67,15 @@ def generate_classification_data(classes=2, rows=1000, cols=32, cat_cols=0): def train_xgboost_classifier(data, labels, depth=25, trees=100): """Train XGBoost classification model""" if xgb is None: - raise RuntimeError('XGBoost could not be imported') + raise RuntimeError("XGBoost could not be imported") training_params = { - 'eval_metric': 'error', - 'objective': 'binary:logistic', - 'tree_method': 'gpu_hist', - 'max_depth': depth, - 'n_estimators': trees, - 'use_label_encoder': False, - 'predictor': 'gpu_predictor' + "eval_metric": "error", + "objective": "binary:logistic", + "tree_method": "gpu_hist", + "max_depth": depth, + "n_estimators": trees, + "use_label_encoder": False, + "predictor": "gpu_predictor", } model = xgb.XGBClassifier(**training_params) @@ -84,23 +85,23 @@ def train_xgboost_classifier(data, labels, depth=25, trees=100): def train_lightgbm_classifier(data, labels, depth=25, trees=100, classes=2): """Train LightGBM classification model""" if lgb is None: - raise RuntimeError('LightGBM could not be imported') + raise RuntimeError("LightGBM could not be imported") lgb_data = lgb.Dataset(data, label=labels) if classes <= 2: classes = 1 - objective = 'binary' - metric = 'binary_logloss' + objective = "binary" + metric = "binary_logloss" else: - objective = 'multiclass' - metric = 'multi_logloss' + objective = "multiclass" + metric = "multi_logloss" training_params = { - 'metric': metric, - 'objective': objective, - 'num_class': classes, - 'max_depth': depth, - 'verbose': -1 + "metric": metric, + "objective": objective, + "num_class": classes, + "max_depth": depth, + "verbose": -1, } model = lgb.train(training_params, lgb_data, trees) @@ -110,26 +111,26 @@ def train_lightgbm_classifier(data, labels, depth=25, trees=100, classes=2): def train_lightgbm_rf_classifier(data, labels, depth=25, trees=100, classes=2): """Train LightGBM classification model""" if lgb is None: - raise RuntimeError('LightGBM could not be imported') + raise RuntimeError("LightGBM could not be imported") lgb_data = lgb.Dataset(data, label=labels) if classes <= 2: classes = 1 - objective = 'binary' - metric = 'binary_logloss' + objective = "binary" + metric = "binary_logloss" else: - objective = 'multiclass' - metric = 'multi_logloss' + objective = "multiclass" + metric = "multi_logloss" training_params = { - 'bagging_fraction': 0.8, - 'bagging_freq': 1, - 'boosting': 'rf', - 'metric': metric, - 'objective': objective, - 'num_class': classes, - 'max_depth': depth, - 'verbose': -1 + "bagging_fraction": 0.8, + "bagging_freq": 1, + "boosting": "rf", + "metric": metric, + "objective": objective, + "num_class": classes, + "max_depth": depth, + "verbose": -1, } model = lgb.train(training_params, lgb_data, trees) @@ -139,62 +140,46 @@ def train_lightgbm_rf_classifier(data, labels, depth=25, trees=100, classes=2): def train_sklearn_classifier(data, labels, depth=25, trees=100): """Train SKLearn classification model""" if skRFC is None: - raise RuntimeError('SKLearn could not be imported') - model = skRFC( - max_depth=depth, n_estimators=trees, random_state=0 - ) + raise RuntimeError("SKLearn could not be imported") + model = skRFC(max_depth=depth, n_estimators=trees, random_state=0) return model.fit(data, labels) def train_cuml_classifier(data, labels, depth=25, trees=100): """Train SKLearn classification model""" - model = cuRFC( - max_depth=depth, n_estimators=trees, random_state=0 - ) + model = cuRFC(max_depth=depth, n_estimators=trees, random_state=0) return model.fit(data, labels) def train_classifier( - data, - labels, - model_type='xgboost', - depth=25, - trees=100, - classes=2): + data, labels, model_type="xgboost", depth=25, trees=100, classes=2 +): """Train classification model""" - if model_type == 'xgboost': - return train_xgboost_classifier( - data, labels, depth=depth, trees=trees - ) - if model_type == 'lightgbm': + if model_type == "xgboost": + return train_xgboost_classifier(data, labels, depth=depth, trees=trees) + if model_type == "lightgbm": return train_lightgbm_classifier( data, labels, depth=depth, trees=trees, classes=classes ) - if model_type == 'lightgbm_rf': + if model_type == "lightgbm_rf": return train_lightgbm_rf_classifier( data, labels, depth=depth, trees=trees, classes=classes ) - if model_type == 'cuml': - return train_cuml_classifier( - data, labels, depth=depth, trees=trees - ) - if model_type == 'sklearn': - return train_sklearn_classifier( - data, labels, depth=depth, trees=trees - ) + if model_type == "cuml": + return train_cuml_classifier(data, labels, depth=depth, trees=trees) + if model_type == "sklearn": + return train_sklearn_classifier(data, labels, depth=depth, trees=trees) raise RuntimeError('Unknown model type "{}"'.format(model_type)) def generate_regression_data(rows=1000, cols=32): - with cuml.using_output_type('numpy'): + with cuml.using_output_type("numpy"): data, labels = cuml.datasets.make_regression( - n_samples=rows, - n_features=cols, - n_informative=cols // 3, - random_state=0) + n_samples=rows, n_features=cols, n_informative=cols // 3, random_state=0 + ) return data, labels @@ -202,14 +187,14 @@ def train_xgboost_regressor(data, targets, depth=25, trees=100): """Train XGBoost regression model""" if xgb is None: - raise RuntimeError('XGBoost could not be imported') + raise RuntimeError("XGBoost could not be imported") training_params = { - 'objective': 'reg:squarederror', - 'tree_method': 'gpu_hist', - 'max_depth': depth, - 'n_estimators': trees, - 'predictor': 'gpu_predictor' + "objective": "reg:squarederror", + "tree_method": "gpu_hist", + "max_depth": depth, + "n_estimators": trees, + "predictor": "gpu_predictor", } model = xgb.XGBRegressor(**training_params) @@ -219,14 +204,14 @@ def train_xgboost_regressor(data, targets, depth=25, trees=100): def train_lightgbm_regressor(data, targets, depth=25, trees=100): """Train LightGBM regression model""" if lgb is None: - raise RuntimeError('LightGBM could not be imported') + raise RuntimeError("LightGBM could not be imported") lgb_data = lgb.Dataset(data, targets) training_params = { - 'metric': 'l2', - 'objective': 'regression', - 'max_depth': depth, - 'verbose': -1 + "metric": "l2", + "objective": "regression", + "max_depth": depth, + "verbose": -1, } model = lgb.train(training_params, lgb_data, trees) @@ -236,15 +221,15 @@ def train_lightgbm_regressor(data, targets, depth=25, trees=100): def train_lightgbm_rf_regressor(data, targets, depth=25, trees=100): """Train LightGBM regression model""" if lgb is None: - raise RuntimeError('LightGBM could not be imported') + raise RuntimeError("LightGBM could not be imported") lgb_data = lgb.Dataset(data, targets) training_params = { - 'boosting': 'rf', - 'metric': 'l2', - 'objective': 'regression', - 'max_depth': depth, - 'verbose': -1 + "boosting": "rf", + "metric": "l2", + "objective": "regression", + "max_depth": depth, + "verbose": -1, } model = lgb.train(training_params, lgb_data, trees) @@ -254,129 +239,111 @@ def train_lightgbm_rf_regressor(data, targets, depth=25, trees=100): def train_sklearn_regressor(data, targets, depth=25, trees=100): """Train SKLearn regression model""" if skRFR is None: - raise RuntimeError('SKLearn could not be imported') - model = skRFR( - max_depth=depth, n_estimators=trees, random_state=0 - ) + raise RuntimeError("SKLearn could not be imported") + model = skRFR(max_depth=depth, n_estimators=trees, random_state=0) return model.fit(data, targets) def train_cuml_regressor(data, targets, depth=25, trees=100): """Train cuML regression model""" - model = cuRFR( - max_depth=depth, n_estimators=trees, random_state=0 - ) + model = cuRFR(max_depth=depth, n_estimators=trees, random_state=0) return model.fit(data, targets) -def train_regressor( - data, - targets, - model_type='xgboost', - depth=25, - trees=100): +def train_regressor(data, targets, model_type="xgboost", depth=25, trees=100): """Train regression model""" - if model_type == 'xgboost': - return train_xgboost_regressor( - data, targets, depth=depth, trees=trees - ) - if model_type == 'lightgbm': - return train_lightgbm_regressor( - data, targets, depth=depth, trees=trees - ) - if model_type == 'lightgbm_rf': - return train_lightgbm_rf_regressor( - data, labels, depth=depth, trees=trees - ) - if model_type == 'sklearn': - return train_sklearn_regressor( - data, targets, depth=depth, trees=trees - ) - if model_type == 'cuml': - return train_cuml_regressor( - data, targets, depth=depth, trees=trees - ) + if model_type == "xgboost": + return train_xgboost_regressor(data, targets, depth=depth, trees=trees) + if model_type == "lightgbm": + return train_lightgbm_regressor(data, targets, depth=depth, trees=trees) + if model_type == "lightgbm_rf": + return train_lightgbm_rf_regressor(data, labels, depth=depth, trees=trees) + if model_type == "sklearn": + return train_sklearn_regressor(data, targets, depth=depth, trees=trees) + if model_type == "cuml": + return train_cuml_regressor(data, targets, depth=depth, trees=trees) raise RuntimeError('Unknown model type "{}"'.format(model_type)) def generate_model( - task='classification', - model_type='xgboost', - depth=25, - trees=100, - classes=2, - samples=1000, - features=32, - cat_features=0): + task="classification", + model_type="xgboost", + depth=25, + trees=100, + classes=2, + samples=1000, + features=32, + cat_features=0, +): """Generate a model with the given properties""" - if cat_features != 0 and model_type != 'lightgbm': + if cat_features != 0 and model_type != "lightgbm": raise NotImplementedError( - 'Categorical feature generation has not yet been implemented for' - ' non-LightGBM models' + "Categorical feature generation has not yet been implemented for" + " non-LightGBM models" ) - if task == 'classification': + if task == "classification": data, labels = generate_classification_data( classes=classes, rows=samples, cols=features, cat_cols=cat_features ) return train_classifier( - data, labels, model_type=model_type, depth=depth, trees=trees, - classes=classes - ) - if task == 'regression': - data, labels = generate_regression_data( - rows=samples, cols=features + data, + labels, + model_type=model_type, + depth=depth, + trees=trees, + classes=classes, ) + if task == "regression": + data, labels = generate_regression_data(rows=samples, cols=features) return train_regressor( data, labels, model_type=model_type, depth=depth, trees=trees ) raise RuntimeError('Unknown model task "{}"'.format(task)) -def serialize_model(model, directory, output_format='xgboost'): - if output_format == 'xgboost': - model_path = os.path.join(directory, 'xgboost.model') +def serialize_model(model, directory, output_format="xgboost"): + if output_format == "xgboost": + model_path = os.path.join(directory, "xgboost.model") model.save_model(model_path) return model_path - if output_format == 'xgboost_json': - model_path = os.path.join(directory, 'xgboost.json') + if output_format == "xgboost_json": + model_path = os.path.join(directory, "xgboost.json") model.save_model(model_path) return model_path - if output_format == 'lightgbm': - model_path = os.path.join(directory, 'model.txt') + if output_format == "lightgbm": + model_path = os.path.join(directory, "model.txt") model.save_model(model_path) return model_path - if output_format == 'pickle': - model_path = os.path.join(directory, 'model.pkl') - with open(model_path, 'wb') as model_file: + if output_format == "pickle": + model_path = os.path.join(directory, "model.pkl") + with open(model_path, "wb") as model_file: pickle.dump(model, model_file) return model_path - raise RuntimeError( - f'Unknown serialization format "{output_format}"' - ) + raise RuntimeError(f'Unknown serialization format "{output_format}"') def generate_config( - model_name, - *, - instance_kind='gpu', - model_format='xgboost', - features=32, - num_classes=2, - predict_proba=False, - use_experimental_optimizations=True, - task='classification', - threshold=0.5, - max_batch_size=8192, - storage_type="AUTO"): - """Return a string with the full Triton config.pbtxt for this model - """ - if instance_kind == 'gpu': - instance_kind = 'KIND_GPU' - elif instance_kind == 'cpu': - instance_kind = 'KIND_CPU' + model_name, + *, + instance_kind="gpu", + model_format="xgboost", + features=32, + num_classes=2, + predict_proba=False, + use_experimental_optimizations=True, + task="classification", + threshold=0.5, + max_batch_size=8192, + storage_type="AUTO", +): + """Return a string with the full Triton config.pbtxt for this model""" + if instance_kind == "gpu": + instance_kind = "KIND_GPU" + elif instance_kind == "cpu": + instance_kind = "KIND_CPU" else: raise ValueError("instance_kind must be either 'gpu' or 'cpu'") if predict_proba: @@ -385,10 +352,10 @@ def generate_config( output_dim = 1 predict_proba = str(bool(predict_proba)).lower() use_experimental_optimizations = str(bool(use_experimental_optimizations)).lower() - output_class = str(task == 'classification').lower() + output_class = str(task == "classification").lower() - if model_format == 'pickle': - model_format = 'treelite_checkpoint' + if model_format == "pickle": + model_format = "treelite_checkpoint" # Add treeshap output to xgboost_shap model treeshap_output_dim = num_classes if num_classes > 2 else 1 @@ -397,7 +364,7 @@ def generate_config( else: treeshap_output_str = f"{treeshap_output_dim}, {features + 1}" treeshap_output = "" - if model_name == 'xgboost_shap': + if model_name == "xgboost_shap": treeshap_output = f""" ,{{ name: "treeshap_output" @@ -464,56 +431,45 @@ def generate_config( def build_model( - task='classification', - model_type='xgboost', - instance_kind='gpu', - output_format=None, - depth=25, - trees=100, - classes=2, - samples=1000, - features=32, - cat_features=0, - model_repo=os.path.dirname(__file__), - model_name=None, - classification_threshold=0.5, - predict_proba=False, - use_experimental_optimizations=True, - max_batch_size=8192, - storage_type="AUTO"): + task="classification", + model_type="xgboost", + instance_kind="gpu", + output_format=None, + depth=25, + trees=100, + classes=2, + samples=1000, + features=32, + cat_features=0, + model_repo=os.path.dirname(__file__), + model_name=None, + classification_threshold=0.5, + predict_proba=False, + use_experimental_optimizations=True, + max_batch_size=8192, + storage_type="AUTO", +): """Train a model with given parameters, create a config file, and add it to the model repository""" if model_repo is None: - model_repo = os.path.join( - os.path.dirname(__file__), - 'model_repository' - ) + model_repo = os.path.join(os.path.dirname(__file__), "model_repository") if output_format is None: - if model_type == 'xgboost': - output_format = 'xgboost' - elif model_type == 'lightgbm': - output_format = 'lightgbm' - elif model_type in {'sklearn', 'cuml'}: - output_format = 'pickle' + if model_type == "xgboost": + output_format = "xgboost" + elif model_type == "lightgbm": + output_format = "lightgbm" + elif model_type in {"sklearn", "cuml"}: + output_format = "pickle" else: raise RuntimeError('Unknown model type "{}"'.format(model_type)) if ( - ( - model_type == 'xgboost' and - output_format not in {'xgboost', 'xgboost_json'} - ) or ( - model_type == 'lightgbm' and - output_format not in {'lightgbm'} - ) or ( - model_type == 'sklearn' and - output_format not in {'pickle'} - ) or ( - model_type == 'cuml' and - output_format not in {'pickle'} - ) + (model_type == "xgboost" and output_format not in {"xgboost", "xgboost_json"}) + or (model_type == "lightgbm" and output_format not in {"lightgbm"}) + or (model_type == "sklearn" and output_format not in {"pickle"}) + or (model_type == "cuml" and output_format not in {"pickle"}) ): raise RuntimeError( f'Output format "{output_format}" inconsistent with model type' @@ -524,7 +480,7 @@ def build_model( model_name = f"{model_type}_{task}_{output_format}" config_dir = os.path.abspath(os.path.join(model_repo, model_name)) - model_dir = os.path.join(config_dir, '1') + model_dir = os.path.join(config_dir, "1") os.makedirs(model_dir, exist_ok=True) model = generate_model( @@ -535,7 +491,7 @@ def build_model( classes=classes, samples=samples, features=features, - cat_features=cat_features + cat_features=cat_features, ) serialize_model(model, model_dir, output_format=output_format) @@ -551,11 +507,11 @@ def build_model( task=task, threshold=classification_threshold, max_batch_size=max_batch_size, - storage_type=storage_type + storage_type=storage_type, ) - config_path = os.path.join(config_dir, 'config.pbtxt') + config_path = os.path.join(config_dir, "config.pbtxt") - with open(config_path, 'w') as config_file: + with open(config_path, "w") as config_file: config_file.write(config) return model_name @@ -565,128 +521,105 @@ def parse_args(): """Parse CLI arguments for model creation""" parser = argparse.ArgumentParser() parser.add_argument( - '--type', - choices=('lightgbm', 'lightgbm_rf', 'xgboost', 'sklearn', 'cuml'), - default='xgboost', - help='type of model', + "--type", + choices=("lightgbm", "lightgbm_rf", "xgboost", "sklearn", "cuml"), + default="xgboost", + help="type of model", ) parser.add_argument( - '--instance_kind', - choices=('gpu', 'cpu'), - default='gpu', - help='Whether to use GPU or CPU for prediction', + "--instance_kind", + choices=("gpu", "cpu"), + default="gpu", + help="Whether to use GPU or CPU for prediction", ) parser.add_argument( - '--task', - choices=('classification', 'regression'), - default='classification', - help='whether model should perform classification or regression', + "--task", + choices=("classification", "regression"), + default="classification", + help="whether model should perform classification or regression", ) parser.add_argument( - '--format', - choices=('xgboost', 'xgboost_json', 'lightgbm', 'pickle'), + "--format", + choices=("xgboost", "xgboost_json", "lightgbm", "pickle"), default=None, - help='serialization format for model', + help="serialization format for model", ) + parser.add_argument("--depth", type=int, help="maximum model depth", default=25) parser.add_argument( - '--depth', - type=int, - help='maximum model depth', - default=25 + "--trees", type=int, help="number of trees in model", default=100 ) parser.add_argument( - '--trees', - type=int, - help='number of trees in model', - default=100 + "--classes", type=int, help="for classifiers, the number of classes", default=2 ) parser.add_argument( - '--classes', - type=int, - help='for classifiers, the number of classes', - default=2 + "--features", type=int, help="number of features in data", default=32 ) parser.add_argument( - '--features', + "--cat_features", type=int, - help='number of features in data', - default=32 + help="number of categorical features (must be <= features)", + default=0, ) parser.add_argument( - '--cat_features', - type=int, - help='number of categorical features (must be <= features)', - default=0 + "--samples", type=int, help="number of training samples", default=1000 ) + parser.add_argument("--repo", help="path to model repository", default=None) + parser.add_argument("--name", help="name for model", default=None) parser.add_argument( - '--samples', - type=int, - help='number of training samples', - default=1000 - ) - parser.add_argument( - '--repo', - help='path to model repository', - default=None - ) - parser.add_argument( - '--name', - help='name for model', - default=None - ) - parser.add_argument( - '--threshold', + "--threshold", type=float, - help='for classifiers, the classification threshold', - default=0.5 + help="for classifiers, the classification threshold", + default=0.5, ) parser.add_argument( - '--predict_proba', - action='store_true', - help='for classifiers, output class scores', + "--predict_proba", + action="store_true", + help="for classifiers, output class scores", ) parser.add_argument( - '--disable_experimental_optimizations', - action='store_true', - help='for classifiers, output class scores', + "--disable_experimental_optimizations", + action="store_true", + help="for classifiers, output class scores", ) parser.add_argument( - '--max_batch_size', + "--max_batch_size", type=int, - help='largest batch size allowed for this model', - default=8192 + help="largest batch size allowed for this model", + default=8192, ) parser.add_argument( - '--storage_type', - choices=['AUTO', 'DENSE', 'SPARSE', 'SPARSE8'], - help='storage type used to load this model in FIL', - default='AUTO' + "--storage_type", + choices=["AUTO", "DENSE", "SPARSE", "SPARSE8"], + help="storage type used to load this model in FIL", + default="AUTO", ) return parser.parse_args() -if __name__ == '__main__': +if __name__ == "__main__": args = parse_args() - print(build_model( - task=args.task, - model_type=args.type, - instance_kind=args.instance_kind, - output_format=args.format, - depth=args.depth, - trees=args.trees, - classes=args.classes, - samples=args.samples, - features=args.features, - cat_features=args.cat_features, - model_repo=args.repo, - model_name=args.name, - classification_threshold=args.threshold, - predict_proba=args.predict_proba, - use_experimental_optimizations=( - not args.disable_experimental_optimizations - ), - max_batch_size=args.max_batch_size, - storage_type=args.storage_type - )) + print( + build_model( + task=args.task, + model_type=args.type, + instance_kind=args.instance_kind, + output_format=args.format, + depth=args.depth, + trees=args.trees, + classes=args.classes, + samples=args.samples, + features=args.features, + cat_features=args.cat_features, + model_repo=args.repo, + model_name=args.name, + classification_threshold=args.threshold, + predict_proba=args.predict_proba, + use_experimental_optimizations=( + not args.disable_experimental_optimizations + ), + max_batch_size=args.max_batch_size, + storage_type=args.storage_type, + ) + ) diff --git a/qa/L0_e2e/test_model.py b/qa/L0_e2e/test_model.py index e0e8fd2c..ecfc80d4 100644 --- a/qa/L0_e2e/test_model.py +++ b/qa/L0_e2e/test_model.py @@ -24,45 +24,49 @@ import numpy as np import pytest import treelite -from hypothesis import given, settings, assume, HealthCheck +import xgboost as xgb +from hypothesis import HealthCheck, assume, given, settings from hypothesis import strategies as st from hypothesis.extra.numpy import arrays as st_arrays from rapids_triton import Client -from rapids_triton.testing import get_random_seed, arrays_close -import xgboost as xgb +from rapids_triton.testing import arrays_close, get_random_seed TOTAL_SAMPLES = 20 MODELS = ( - 'xgboost', - 'xgboost_shap', - 'xgboost_json', - 'lightgbm', - 'lightgbm_rf', - 'regression', - 'sklearn', - 'cuml' + "xgboost", + "xgboost_shap", + "xgboost_json", + "lightgbm", + "lightgbm_rf", + "regression", + "sklearn", + "cuml", +) + +ModelData = namedtuple( + "ModelData", + ( + "name", + "input_shapes", + "output_sizes", + "max_batch_size", + "ground_truth_model", + "config", + ), ) -ModelData = namedtuple('ModelData', ( - 'name', - 'input_shapes', - 'output_sizes', - 'max_batch_size', - 'ground_truth_model', - 'config' -)) # TODO(wphicks): Replace with cache in 3.9 @lru_cache() def valid_shm_modes(): """Return a tuple of allowed shared memory modes""" modes = [None] - if os.environ.get('CPU_ONLY', 0) == 0: - modes.append('cuda') + if os.environ.get("CPU_ONLY", 0) == 0: + modes.append("cuda") return tuple(modes) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def client(): """A RAPIDS-Triton client for submitting inference requests""" client = Client() @@ -70,15 +74,15 @@ def client(): return client -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def model_repo(pytestconfig): """The path to the model repository directory""" - return pytestconfig.getoption('repo') + return pytestconfig.getoption("repo") -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def skip_shap(pytestconfig): - return pytestconfig.getoption('no_shap') + return pytestconfig.getoption("no_shap") def get_model_parameter(config, param, default=None): @@ -94,7 +98,7 @@ class GTILModel: """A compatibility wrapper for executing models with GTIL""" def __init__(self, model_path, model_format, output_class): - if model_format == 'treelite_checkpoint': + if model_format == "treelite_checkpoint": self.tl_model = treelite.Model.deserialize(model_path) else: self.tl_model = treelite.Model.load(model_path, model_format) @@ -120,46 +124,46 @@ def predict(self, arr): class GroundTruthModel: """A reference model used for comparison against results returned from Triton""" + def __init__( - self, - name, - model_repo, - model_format, - predict_proba, - output_class, - use_cpu, - *, - model_version=1): - model_dir = os.path.join(model_repo, name, f'{model_version}') + self, + name, + model_repo, + model_format, + predict_proba, + output_class, + use_cpu, + *, + model_version=1, + ): + model_dir = os.path.join(model_repo, name, f"{model_version}") self.predict_proba = predict_proba self._run_treeshap = False - if model_format == 'xgboost': - model_path = os.path.join(model_dir, 'xgboost.model') - elif model_format == 'xgboost_json': - model_path = os.path.join(model_dir, 'xgboost.json') - elif model_format == 'lightgbm': - model_path = os.path.join(model_dir, 'model.txt') - elif model_format == 'treelite_checkpoint': + if model_format == "xgboost": + model_path = os.path.join(model_dir, "xgboost.model") + elif model_format == "xgboost_json": + model_path = os.path.join(model_dir, "xgboost.json") + elif model_format == "lightgbm": + model_path = os.path.join(model_dir, "model.txt") + elif model_format == "treelite_checkpoint": if use_cpu: - model_path = os.path.join(model_dir, 'checkpoint.tl') + model_path = os.path.join(model_dir, "checkpoint.tl") else: - model_path = os.path.join(model_dir, 'model.pkl') + model_path = os.path.join(model_dir, "model.pkl") else: - raise RuntimeError('Model format not recognized') + raise RuntimeError("Model format not recognized") - if name == 'xgboost_shap': + if name == "xgboost_shap": self._xgb_model = xgb.Booster() self._xgb_model.load_model(model_path) self._run_treeshap = True - + if use_cpu: - self._base_model = GTILModel( - model_path, model_format, output_class - ) + self._base_model = GTILModel(model_path, model_format, output_class) else: - if model_format == 'treelite_checkpoint': - with open(model_path, 'rb') as pkl_file: + if model_format == "treelite_checkpoint": + with open(model_path, "rb") as pkl_file: self._base_model = pickle.load(pkl_file) else: self._base_model = cuml.ForestInference.load( @@ -168,66 +172,58 @@ def __init__( def predict(self, inputs): if self.predict_proba: - result = self._base_model.predict_proba(inputs['input__0']) + result = self._base_model.predict_proba(inputs["input__0"]) else: - result = self._base_model.predict(inputs['input__0']) - output = {'output__0' : result} + result = self._base_model.predict(inputs["input__0"]) + output = {"output__0": result} if self._run_treeshap: - treeshap_result = \ - self._xgb_model.predict(xgb.DMatrix(inputs['input__0']), - pred_contribs=True) - output['treeshap_output'] = treeshap_result + treeshap_result = self._xgb_model.predict( + xgb.DMatrix(inputs["input__0"]), pred_contribs=True + ) + output["treeshap_output"] = treeshap_result return output -@pytest.fixture(scope='session', params=MODELS) +@pytest.fixture(scope="session", params=MODELS) def model_data(request, client, model_repo): """All data associated with a model required for generating examples and comparing with ground truth results""" name = request.param config = client.get_model_config(name) - input_shapes = { - input_.name: list(input_.dims) for input_ in config.input - } + input_shapes = {input_.name: list(input_.dims) for input_ in config.input} output_sizes = { - output.name: np.product(output.dims) * np.dtype('float32').itemsize + output.name: np.product(output.dims) * np.dtype("float32").itemsize for output in config.output } max_batch_size = config.max_batch_size - model_format = get_model_parameter( - config, 'model_type', default='xgboost' - ) - predict_proba = get_model_parameter( - config, 'predict_proba', default='false' - ) - predict_proba = (predict_proba == 'true') - output_class = get_model_parameter( - config, 'output_class', default='true' - ) - output_class = (output_class == 'true') + model_format = get_model_parameter(config, "model_type", default="xgboost") + predict_proba = get_model_parameter(config, "predict_proba", default="false") + predict_proba = predict_proba == "true" + output_class = get_model_parameter(config, "output_class", default="true") + output_class = output_class == "true" - use_cpu = (config.instance_group[0].kind != 1) + use_cpu = config.instance_group[0].kind != 1 ground_truth_model = GroundTruthModel( - name, model_repo, model_format, predict_proba, output_class, use_cpu, - model_version=1 + name, + model_repo, + model_format, + predict_proba, + output_class, + use_cpu, + model_version=1, ) return ModelData( - name, - input_shapes, - output_sizes, - max_batch_size, - ground_truth_model, - config + name, input_shapes, output_sizes, max_batch_size, ground_truth_model, config ) @given(hypothesis_data=st.data()) @settings( deadline=None, - suppress_health_check=(HealthCheck.too_slow, HealthCheck.filter_too_much) + suppress_health_check=(HealthCheck.too_slow, HealthCheck.filter_too_much), ) def test_small(client, model_data, hypothesis_data): """Test Triton-served model on many small Hypothesis-generated examples""" @@ -235,7 +231,7 @@ def test_small(client, model_data, hypothesis_data): total_output_sizes = {} all_triton_outputs = defaultdict(list) default_arrays = { - name: np.random.rand(TOTAL_SAMPLES, *shape).astype('float32') + name: np.random.rand(TOTAL_SAMPLES, *shape).astype("float32") for name, shape in model_data.input_shapes.items() } @@ -243,24 +239,26 @@ def test_small(client, model_data, hypothesis_data): model_inputs = { name: hypothesis_data.draw( st.one_of( - st.just(default_arrays[name][i:i+1, :]), - st_arrays('float32', [1] + shape) + st.just(default_arrays[name][i : i + 1, :]), + st_arrays("float32", [1] + shape), ) - ) for name, shape in model_data.input_shapes.items() + ) + for name, shape in model_data.input_shapes.items() } - if model_data.name == 'sklearn' or model_data.name == 'xgboost_shap': + if model_data.name == "sklearn" or model_data.name == "xgboost_shap": for array in model_inputs.values(): assume(not np.any(np.isnan(array))) model_output_sizes = { - name: size - for name, size in model_data.output_sizes.items() + name: size for name, size in model_data.output_sizes.items() } - shared_mem = hypothesis_data.draw(st.one_of( - st.just(mode) for mode in valid_shm_modes() - )) + shared_mem = hypothesis_data.draw( + st.one_of(st.just(mode) for mode in valid_shm_modes()) + ) result = client.predict( - model_data.name, model_inputs, model_data.output_sizes, - shared_mem=shared_mem + model_data.name, + model_inputs, + model_data.output_sizes, + shared_mem=shared_mem, ) for name, input_ in model_inputs.items(): all_model_inputs[name].append(input_) @@ -270,12 +268,10 @@ def test_small(client, model_data, hypothesis_data): all_triton_outputs[name].append(output) all_model_inputs = { - name: np.concatenate(arrays) - for name, arrays in all_model_inputs.items() + name: np.concatenate(arrays) for name, arrays in all_model_inputs.items() } all_triton_outputs = { - name: np.concatenate(arrays) - for name, arrays in all_triton_outputs.items() + name: np.concatenate(arrays) for name, arrays in all_triton_outputs.items() } try: @@ -290,7 +286,7 @@ def test_small(client, model_data, hypothesis_data): ground_truth[output_name], rtol=1e-3, atol=1e-2, - assert_close=True + assert_close=True, ) else: arrays_close( @@ -298,16 +294,15 @@ def test_small(client, model_data, hypothesis_data): ground_truth[output_name], atol=0.1, total_atol=3, - assert_close=True + assert_close=True, ) # Test entire batch of Hypothesis-generated inputs at once - shared_mem = hypothesis_data.draw(st.one_of( - st.just(mode) for mode in valid_shm_modes() - )) + shared_mem = hypothesis_data.draw( + st.one_of(st.just(mode) for mode in valid_shm_modes()) + ) all_triton_outputs = client.predict( - model_data.name, all_model_inputs, total_output_sizes, - shared_mem=shared_mem + model_data.name, all_model_inputs, total_output_sizes, shared_mem=shared_mem ) for output_name in sorted(ground_truth.keys()): @@ -317,7 +312,7 @@ def test_small(client, model_data, hypothesis_data): ground_truth[output_name], rtol=1e-3, atol=1e-2, - assert_close=True + assert_close=True, ) else: arrays_close( @@ -325,7 +320,7 @@ def test_small(client, model_data, hypothesis_data): ground_truth[output_name], atol=0.1, total_atol=3, - assert_close=True + assert_close=True, ) @@ -333,7 +328,7 @@ def test_small(client, model_data, hypothesis_data): def test_max_batch(client, model_data, shared_mem): """Test processing of a single maximum-sized batch""" max_inputs = { - name: np.random.rand(model_data.max_batch_size, *shape).astype('float32') + name: np.random.rand(model_data.max_batch_size, *shape).astype("float32") for name, shape in model_data.input_shapes.items() } model_output_sizes = { @@ -354,7 +349,7 @@ def test_max_batch(client, model_data, shared_mem): ground_truth[output_name], rtol=1e-3, atol=1e-2, - assert_close=True + assert_close=True, ) else: arrays_close( @@ -362,5 +357,5 @@ def test_max_batch(client, model_data, shared_mem): ground_truth[output_name], atol=0.1, total_rtol=3, - assert_close=True + assert_close=True, ) diff --git a/qa/benchmark_repo/large_model-cpu/config.pbtxt b/qa/benchmark_repo/large_model-cpu/config.pbtxt index 4b9d2a6b..1c608155 100644 --- a/qa/benchmark_repo/large_model-cpu/config.pbtxt +++ b/qa/benchmark_repo/large_model-cpu/config.pbtxt @@ -1,11 +1,11 @@ backend: "fil" max_batch_size: 6329 -input [ - { +input [ + { name: "input__0" data_type: TYPE_FP32 - dims: [ 393 ] - } + dims: [ 393 ] + } ] output [ { diff --git a/qa/benchmark_repo/large_model/config.pbtxt b/qa/benchmark_repo/large_model/config.pbtxt index 081e63bb..5513b4e5 100644 --- a/qa/benchmark_repo/large_model/config.pbtxt +++ b/qa/benchmark_repo/large_model/config.pbtxt @@ -1,11 +1,11 @@ backend: "fil" max_batch_size: 6329 -input [ - { +input [ + { name: "input__0" data_type: TYPE_FP32 - dims: [ 393 ] - } + dims: [ 393 ] + } ] output [ { diff --git a/qa/benchmark_repo/small_model-cpu/config.pbtxt b/qa/benchmark_repo/small_model-cpu/config.pbtxt index 4b9d2a6b..1c608155 100644 --- a/qa/benchmark_repo/small_model-cpu/config.pbtxt +++ b/qa/benchmark_repo/small_model-cpu/config.pbtxt @@ -1,11 +1,11 @@ backend: "fil" max_batch_size: 6329 -input [ - { +input [ + { name: "input__0" data_type: TYPE_FP32 - dims: [ 393 ] - } + dims: [ 393 ] + } ] output [ { diff --git a/qa/benchmark_repo/small_model/config.pbtxt b/qa/benchmark_repo/small_model/config.pbtxt index 081e63bb..5513b4e5 100644 --- a/qa/benchmark_repo/small_model/config.pbtxt +++ b/qa/benchmark_repo/small_model/config.pbtxt @@ -1,11 +1,11 @@ backend: "fil" max_batch_size: 6329 -input [ - { +input [ + { name: "input__0" data_type: TYPE_FP32 - dims: [ 393 ] - } + dims: [ 393 ] + } ] output [ { diff --git a/qa/collate_benchmarks.py b/qa/collate_benchmarks.py old mode 100644 new mode 100755 index 6df3d74b..8131b108 --- a/qa/collate_benchmarks.py +++ b/qa/collate_benchmarks.py @@ -5,7 +5,6 @@ import cudf import numpy as np - from scipy.spatial import ConvexHull try: @@ -13,8 +12,9 @@ except ImportError: plt = None -BATCH_FILE_RE = re.compile(r'([0-9]+)\.csv') -SUMMARY_DIR_NAME = 'summary' +BATCH_FILE_RE = re.compile(r"([0-9]+)\.csv") +SUMMARY_DIR_NAME = "summary" + def gather_perf_reports(benchmark_dir): _, model_dirs, _ = next(os.walk(benchmark_dir)) @@ -33,20 +33,19 @@ def collate_raw_data(benchmark_dir): all_data = [] for model, batch, data in gather_perf_reports(benchmark_dir): annotations = cudf.DataFrame( - { - 'Model': [model] * data.shape[0], - 'Batch Size': [batch] * data.shape[0] - }, - columns=('Model', 'Batch Size') + {"Model": [model] * data.shape[0], "Batch Size": [batch] * data.shape[0]}, + columns=("Model", "Batch Size"), ) all_data.append(cudf.concat([annotations, data], axis=1)) return cudf.concat(all_data, axis=0, ignore_index=True) + def pts_to_line(pt1, pt2): slope = (pt2[1] - pt1[1]) / (pt2[0] - pt1[0]) intercept = pt1[1] - slope * pt1[0] return (slope, intercept) + def scatter_to_hull(pts): hull = ConvexHull(pts) pts = pts[hull.vertices] @@ -57,58 +56,50 @@ def scatter_to_hull(pts): def plot_lat_tp(data, latency_percentile=99): - all_models = data['Model'].unique().to_pandas() - plt.xscale('log') - plt.yscale('log') + all_models = data["Model"].unique().to_pandas() + plt.xscale("log") + plt.yscale("log") for model in all_models: - model_data = raw_data.loc[data['Model'] == model].to_pandas() - hull = scatter_to_hull(model_data[ - [f'p{latency_percentile} latency', 'Inferences/Second'] - ].values) - plt.plot(hull[:, 0], hull[:, 1], '-', label=model) - plt.title('Throughput vs. Latency (log-log)') - plt.xlabel('p99 Latency (microseconds)') - plt.ylabel('Throughput (samples/s)') + model_data = raw_data.loc[data["Model"] == model].to_pandas() + hull = scatter_to_hull( + model_data[[f"p{latency_percentile} latency", "Inferences/Second"]].values + ) + plt.plot(hull[:, 0], hull[:, 1], "-", label=model) + plt.title("Throughput vs. Latency (log-log)") + plt.xlabel("p99 Latency (microseconds)") + plt.ylabel("Throughput (samples/s)") plt.legend(all_models) + def plot_througput(data, budget, output_dir): - filtered_data = data[data['p99 latency'] <= budget][ - ['Model', 'Inferences/Second'] - ] - maximums = filtered_data.groupby('Model').max() + filtered_data = data[data["p99 latency"] <= budget][["Model", "Inferences/Second"]] + maximums = filtered_data.groupby("Model").max() maximums.sort_index(inplace=True) budget_ms = round(budget / 1000) - raw_data.to_csv(os.path.join(output_dir, f'{budget_ms}.csv')) + raw_data.to_csv(os.path.join(output_dir, f"{budget_ms}.csv")) if plt is not None: - plt.bar( - maximums.index.values_host, - maximums['Inferences/Second'].values_host - ) + plt.bar(maximums.index.values_host, maximums["Inferences/Second"].values_host) plt.xticks(rotation=90) - plt.title( - f'Throughput for p99 latency budget of {budget_ms} ms' - ) + plt.title(f"Throughput for p99 latency budget of {budget_ms} ms") plt.subplots_adjust(bottom=0.35) - plt.savefig(os.path.join(output_dir, f'{budget_ms}.png')) + plt.savefig(os.path.join(output_dir, f"{budget_ms}.png")) plt.close() -if __name__ == '__main__': +if __name__ == "__main__": benchmark_dir = sys.argv[1] raw_data = collate_raw_data(benchmark_dir) summary_dir = os.path.join(benchmark_dir, SUMMARY_DIR_NAME) - throughput_dir = os.path.join(summary_dir, 'throughput') + throughput_dir = os.path.join(summary_dir, "throughput") os.makedirs(throughput_dir, exist_ok=True) raw_data.to_csv(os.path.join(summary_dir, "raw_data.csv")) try: - latency_cutoff = float(os.environ['MAX_LATENCY']) - raw_data = raw_data[ - raw_data['p99 latency'] <= (latency_cutoff * 1000) - ] + latency_cutoff = float(os.environ["MAX_LATENCY"]) + raw_data = raw_data[raw_data["p99 latency"] <= (latency_cutoff * 1000)] except KeyError: pass # No latency cutoff specified @@ -120,4 +111,4 @@ def plot_througput(data, budget, output_dir): if plt is not None: plot_lat_tp(raw_data) - plt.savefig(os.path.join(summary_dir, 'latency_throughput.png')) + plt.savefig(os.path.join(summary_dir, "latency_throughput.png")) diff --git a/qa/run-clang-format.py b/qa/run-clang-format.py index 9e7fa6c9..d64ef4e2 100755 --- a/qa/run-clang-format.py +++ b/qa/run-clang-format.py @@ -1,4 +1,5 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +#!/usr/bin/env python3 +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -9,7 +10,7 @@ # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and +# See the License for the specific languapge governing permissions and # limitations under the License. # # Note: This file was taken directly from @@ -17,14 +18,14 @@ # with minor modifications. from __future__ import print_function -import sys -import re + +import argparse import os +import re import subprocess -import argparse +import sys import tempfile - EXPECTED_VERSION = "11.1.0" VERSION_REGEX = re.compile(r"clang-format version ([0-9.]+)") # NOTE: populate this list with more top-level dirs as we add more of them to @@ -34,23 +35,41 @@ def parse_args(): argparser = argparse.ArgumentParser("Runs clang-format on a project") - argparser.add_argument("-dstdir", type=str, default=None, - help="Directory to store the temporary outputs of" - " clang-format. If nothing is passed for this, then" - " a temporary dir will be created using `mkdtemp`") - argparser.add_argument("-exe", type=str, default="clang-format", - help="Path to clang-format exe") - argparser.add_argument("-inplace", default=False, action="store_true", - help="Replace the source files itself.") - argparser.add_argument("-regex", type=str, - default=r"[.](cu|cuh|h|hpp|cpp)$", - help="Regex string to filter in sources") - argparser.add_argument("-ignore", type=str, default=r"cannylab/bh[.]cu$", - help="Regex used to ignore files from matched list") - argparser.add_argument("-v", dest="verbose", action="store_true", - help="Print verbose messages") - argparser.add_argument("dirs", type=str, nargs="*", - help="List of dirs where to find sources") + argparser.add_argument( + "-dstdir", + type=str, + default=None, + help="Directory to store the temporary outputs of" + " clang-format. If nothing is passed for this, then" + " a temporary dir will be created using `mkdtemp`", + ) + argparser.add_argument( + "-exe", type=str, default="clang-format", help="Path to clang-format exe" + ) + argparser.add_argument( + "-inplace", + default=False, + action="store_true", + help="Replace the source files itself.", + ) + argparser.add_argument( + "-regex", + type=str, + default=r"[.](cu|cuh|h|hpp|cpp)$", + help="Regex string to filter in sources", + ) + argparser.add_argument( + "-ignore", + type=str, + default=r"cannylab/bh[.]cu$", + help="Regex used to ignore files from matched list", + ) + argparser.add_argument( + "-v", dest="verbose", action="store_true", help="Print verbose messages" + ) + argparser.add_argument( + "dirs", type=str, nargs="*", help="List of dirs where to find sources" + ) args = argparser.parse_args() args.regex_compiled = re.compile(args.regex) args.ignore_compiled = re.compile(args.ignore) @@ -125,8 +144,9 @@ def main(): if not os.path.exists(".git"): print("Error!! This needs to always be run from the root of repo") sys.exit(-1) - all_files = list_all_src_files(args.regex_compiled, args.ignore_compiled, - args.dirs, args.dstdir, args.inplace) + all_files = list_all_src_files( + args.regex_compiled, args.ignore_compiled, args.dirs, args.dstdir, args.inplace + ) # actual format checker status = True for src, dst in all_files: @@ -138,9 +158,7 @@ def main(): print(" 2. Or run the below command to bulk-fix all these at once") print("Bulk-fix command: ") print( - " python qa/run-clang-format.py {} -inplace".format( - " ".join(sys.argv[1:]) - ) + " python qa/run-clang-format.py {} -inplace".format(" ".join(sys.argv[1:])) ) sys.exit(-1) return diff --git a/qa/run_tests.sh b/qa/run_tests.sh index 62919f9a..6f5b6ee4 100755 --- a/qa/run_tests.sh +++ b/qa/run_tests.sh @@ -104,7 +104,7 @@ then pytest \ --repo "${MODEL_REPO}" \ --hypothesis-profile "$TEST_PROFILE" \ - "$QA_DIR" + "$QA_DIR" else pytest --repo "${MODEL_REPO}" "$QA_DIR" --hypothesis-profile "$TEST_PROFILE" fi diff --git a/scripts/convert_cuml.py b/scripts/convert_cuml.py index 8e3579c2..7ae16446 100755 --- a/scripts/convert_cuml.py +++ b/scripts/convert_cuml.py @@ -13,28 +13,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -'''cuML RF to Treelite checkpoint converter +"""cuML RF to Treelite checkpoint converter Given a path to a pickle file containing a cuML random forest model, this script will generate a Treelite checkpoint file representation of the model in the same directory. -''' +""" import argparse import os import pickle -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument( - 'pickle_file', help='Path to the pickle file to convert' - ) + parser.add_argument("pickle_file", help="Path to the pickle file to convert") args = parser.parse_args() - with open(args.pickle_file, 'rb') as file_: + with open(args.pickle_file, "rb") as file_: model = pickle.load(file_) model_dir = os.path.dirname(args.pickle_file) - out_path = os.path.join(model_dir, 'checkpoint.tl') + out_path = os.path.join(model_dir, "checkpoint.tl") model.convert_to_treelite_model().to_treelite_checkpoint(out_path) diff --git a/src/api.cc b/src/api.cc index 7e62aa63..8a374a6c 100644 --- a/src/api.cc +++ b/src/api.cc @@ -31,9 +31,7 @@ #include #include -namespace triton { -namespace backend { -namespace NAMESPACE { +namespace triton { namespace backend { namespace NAMESPACE { using ModelState = rapids::TritonModelState; using ModelInstanceState = @@ -43,38 +41,46 @@ extern "C" { /** Confirm that backend is compatible with Triton's backend API version */ -TRITONSERVER_Error* TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend) { +TRITONSERVER_Error* +TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend) +{ return rapids::triton_api::initialize(backend); } -TRITONSERVER_Error* TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model) { +TRITONSERVER_Error* +TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model) +{ return rapids::triton_api::model_initialize(model); } -TRITONSERVER_Error* TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model) { +TRITONSERVER_Error* +TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model) +{ return rapids::triton_api::model_finalize(model); } -TRITONSERVER_Error* TRITONBACKEND_ModelInstanceInitialize( - TRITONBACKEND_ModelInstance* instance) { - return rapids::triton_api::instance_initialize(instance); +TRITONSERVER_Error* +TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance) +{ + return rapids::triton_api::instance_initialize< + ModelState, ModelInstanceState>(instance); } -TRITONSERVER_Error* TRITONBACKEND_ModelInstanceFinalize( - TRITONBACKEND_ModelInstance* instance) { +TRITONSERVER_Error* +TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance) +{ return rapids::triton_api::instance_finalize(instance); } -TRITONSERVER_Error* TRITONBACKEND_ModelInstanceExecute( +TRITONSERVER_Error* +TRITONBACKEND_ModelInstanceExecute( TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** raw_requests, - uint32_t const request_count) { + uint32_t const request_count) +{ return rapids::triton_api::execute( instance, raw_requests, static_cast(request_count)); } } // extern "C" -} // namespace NAMESPACE -} // namespace backend -} // namespace triton +}}} // namespace triton::backend::NAMESPACE diff --git a/src/cpu_treeshap_model.h b/src/cpu_treeshap_model.h index 804de42e..921d3b97 100644 --- a/src/cpu_treeshap_model.h +++ b/src/cpu_treeshap_model.h @@ -349,32 +349,35 @@ struct TreeShapModel { std::size_t n_rows, std::size_t n_cols) const { thread_count nthread(tl_model_->config().cpu_nthread); - std::visit([&](const auto& info) { + std::visit( + [&](const auto& info) { #pragma omp parallel for num_threads(static_cast(nthread)) - for (auto i = 0; i < n_rows; i++) { - for (auto info_idx = 0; info_idx < info.size(); info_idx++) { - // One class per tree - auto output_offset = - output.data() + - (i * num_class_ + info[info_idx].class_idx) * (n_cols + 1); - linear_treeshap( - info[info_idx], output_offset, input.data() + i * n_cols, n_cols); - } - } - // Scale output - auto scale = 1.0f / average_factor_; - for (auto i = 0; i < output.size(); i++) { - output.data()[i] = output.data()[i] * scale; - } - // Add global bias to bias column - for (auto i = 0; i < n_rows; i++) { - for (auto class_idx = 0; class_idx < num_class_; class_idx++) { - auto output_offset = - (i * num_class_ + class_idx) * (n_cols + 1) + n_cols; - output.data()[output_offset] += global_bias_; - } - } - }, meta_infos_); + for (auto i = 0; i < n_rows; i++) { + for (auto info_idx = 0; info_idx < info.size(); info_idx++) { + // One class per tree + auto output_offset = + output.data() + + (i * num_class_ + info[info_idx].class_idx) * (n_cols + 1); + linear_treeshap( + info[info_idx], output_offset, input.data() + i * n_cols, + n_cols); + } + } + // Scale output + auto scale = 1.0f / average_factor_; + for (auto i = 0; i < output.size(); i++) { + output.data()[i] = output.data()[i] * scale; + } + // Add global bias to bias column + for (auto i = 0; i < n_rows; i++) { + for (auto class_idx = 0; class_idx < num_class_; class_idx++) { + auto output_offset = + (i * num_class_ + class_idx) * (n_cols + 1) + n_cols; + output.data()[output_offset] += global_bias_; + } + } + }, + meta_infos_); } std::shared_ptr tl_model_; int num_class_; diff --git a/src/fil_config.h b/src/fil_config.h index 95698366..63c59c07 100644 --- a/src/fil_config.h +++ b/src/fil_config.h @@ -82,8 +82,7 @@ tl_to_fil_config(treelite_config const& tl_config) tl_config.threads_per_tree, 0, nullptr, - ML::fil::precision_t::PRECISION_FLOAT32 - }; + ML::fil::precision_t::PRECISION_FLOAT32}; } }}} // namespace triton::backend::NAMESPACE diff --git a/src/gpu_forest_model.h b/src/gpu_forest_model.h index dd9179ab..0a641ebc 100644 --- a/src/gpu_forest_model.h +++ b/src/gpu_forest_model.h @@ -48,11 +48,11 @@ struct ForestModel { raft_handle_, &variant_result, tl_model_->handle(), &config); try { result = std::get(variant_result); - } catch (std::bad_variant_access const& err) { + } + catch (std::bad_variant_access const& err) { throw rapids::TritonException( - rapids::Error::Internal, - "Model did not load with expected precision" - ); + rapids::Error::Internal, + "Model did not load with expected precision"); } return result; }()} diff --git a/src/gpu_treeshap_model.h b/src/gpu_treeshap_model.h index 154767ed..0c95e8c3 100644 --- a/src/gpu_treeshap_model.h +++ b/src/gpu_treeshap_model.h @@ -53,13 +53,9 @@ struct TreeShapModel { // take a stream on its API input.stream_synchronize(); ML::Explainer::gpu_treeshap( - path_info_, - ML::Explainer::FloatPointer(const_cast(input.data())), - n_rows, - n_cols, - ML::Explainer::FloatPointer(output.data()), - output.size() - ); + path_info_, + ML::Explainer::FloatPointer(const_cast(input.data())), n_rows, + n_cols, ML::Explainer::FloatPointer(output.data()), output.size()); output.stream_synchronize(); } diff --git a/src/herring/model.hpp b/src/herring/model.hpp index c988cd39..83c97105 100644 --- a/src/herring/model.hpp +++ b/src/herring/model.hpp @@ -16,289 +16,302 @@ #pragma once -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace herring { #ifdef __cpp_lib_hardware_interference_size - using std::hardware_constructive_interference_size; +using std::hardware_constructive_interference_size; #else - auto constexpr hardware_constructive_interference_size = std::size_t{64}; +auto constexpr hardware_constructive_interference_size = std::size_t{64}; #endif - template - struct simple_model { - using simple_tree_t = simple_tree; - using lookup_tree_t = lookup_tree; - using tree_type = std::conditional_t< - std::is_same_v || std::is_same_v, - simple_tree_t, - lookup_tree_t - >; - using sum_elem_type = typename is_container_specialization::value_type; +template < + typename value_t, typename feature_index_t, typename offset_t, + typename output_index_t, typename output_t> +struct simple_model { + using simple_tree_t = + simple_tree; + using lookup_tree_t = + lookup_tree; + using tree_type = std::conditional_t< + std::is_same_v || + std::is_same_v, + simple_tree_t, lookup_tree_t>; + using sum_elem_type = + typename is_container_specialization::value_type; - std::vector trees; - std::size_t num_class; - std::size_t num_feature; - row_op row_postproc; - float average_factor; - float bias; - float postproc_constant; - std::vector mutable row_has_missing; - bool use_inclusive_threshold; - bool has_categorical_trees; + std::vector trees; + std::size_t num_class; + std::size_t num_feature; + row_op row_postproc; + float average_factor; + float bias; + float postproc_constant; + std::vector mutable row_has_missing; + bool use_inclusive_threshold; + bool has_categorical_trees; - void predict(float const* input, std::size_t num_row, float* output, thread_count nthread) const { - // This dispatch structure is designed to determine as early as possible - // whether a "slow" path is required and to convert booleans that - // determine slow/fast path execution to compile-time constants such that - // the compiled fast path never has to make checks required by the slow - // path. Within the implementation of predict_, there is further - // dispatching of the same sort to allow subsets of the model to use as - // much fast-path coode as possible. - // - // TODO (wphicks): Much of this could be cleaned up with some template - // metaprogramming and a few helper functions for switching to various - // compile-time paths based on runtime boolean values. - // (https://github.com/triton-inference-server/fil_backend/issues/205) - // - if (!precompute_missing(input, num_row)) { - if (!use_inclusive_threshold) { - if (!has_categorical_trees) { - predict_(input, output, num_row, nthread); - } else { - predict_(input, output, num_row, nthread); - } + void predict( + float const* input, std::size_t num_row, float* output, + thread_count nthread) const + { + // This dispatch structure is designed to determine as early as possible + // whether a "slow" path is required and to convert booleans that + // determine slow/fast path execution to compile-time constants such that + // the compiled fast path never has to make checks required by the slow + // path. Within the implementation of predict_, there is further + // dispatching of the same sort to allow subsets of the model to use as + // much fast-path coode as possible. + // + // TODO (wphicks): Much of this could be cleaned up with some template + // metaprogramming and a few helper functions for switching to various + // compile-time paths based on runtime boolean values. + // (https://github.com/triton-inference-server/fil_backend/issues/205) + // + if (!precompute_missing(input, num_row)) { + if (!use_inclusive_threshold) { + if (!has_categorical_trees) { + predict_(input, output, num_row, nthread); } else { - if (!has_categorical_trees) { - predict_(input, output, num_row, nthread); - } else { - predict_(input, output, num_row, nthread); - } + predict_(input, output, num_row, nthread); } } else { - if (!use_inclusive_threshold) { - if (!has_categorical_trees) { - predict_(input, output, num_row, nthread); - } else { - predict_(input, output, num_row, nthread); - } + if (!has_categorical_trees) { + predict_(input, output, num_row, nthread); } else { - if (!has_categorical_trees) { - predict_(input, output, num_row, nthread); - } else { - predict_(input, output, num_row, nthread); - } + predict_(input, output, num_row, nthread); } } - } - - void set_element_postproc(element_op element_postproc) { - postprocess_element = [this, element_postproc]() -> std::function { - auto constant = postproc_constant; - switch(element_postproc) { - case element_op::signed_square: - return [](sum_elem_type elem, float* out) { - *out = std::copysign(elem * elem, elem); - }; - case element_op::hinge: - return [](sum_elem_type elem, float* out) { - *out = elem > sum_elem_type{} ? sum_elem_type{1} : sum_elem_type{0}; - }; - case element_op::sigmoid: - return [constant](sum_elem_type elem, float* out) { - *out = sum_elem_type{1} / (sum_elem_type{1} + std::exp(-constant * elem)); - }; - case element_op::exponential: - return [](sum_elem_type elem, float* out) { - *out = std::exp(elem); - }; - case element_op::exponential_standard_ratio: - return [constant](sum_elem_type elem, float* out) { - *out = std::exp(-elem / constant); - }; - case element_op::logarithm_one_plus_exp: - return [](sum_elem_type elem, float* out) { - *out = std::log1p(std::exp(elem)); - }; - default: - return [](sum_elem_type elem, float* out) { - *out = elem; - }; + } else { + if (!use_inclusive_threshold) { + if (!has_categorical_trees) { + predict_(input, output, num_row, nthread); + } else { + predict_(input, output, num_row, nthread); + } + } else { + if (!has_categorical_trees) { + predict_(input, output, num_row, nthread); + } else { + predict_(input, output, num_row, nthread); } - }(); + } } + } - private: - std::function postprocess_element; - - auto precompute_missing(float const* input, std::size_t num_row) const { - auto result = false; - if (num_row > row_has_missing.size()) { - row_has_missing.resize(num_row); + void set_element_postproc(element_op element_postproc) + { + postprocess_element = + [this, + element_postproc]() -> std::function { + auto constant = postproc_constant; + switch (element_postproc) { + case element_op::signed_square: + return [](sum_elem_type elem, float* out) { + *out = std::copysign(elem * elem, elem); + }; + case element_op::hinge: + return [](sum_elem_type elem, float* out) { + *out = elem > sum_elem_type{} ? sum_elem_type{1} : sum_elem_type{0}; + }; + case element_op::sigmoid: + return [constant](sum_elem_type elem, float* out) { + *out = sum_elem_type{1} / + (sum_elem_type{1} + std::exp(-constant * elem)); + }; + case element_op::exponential: + return [](sum_elem_type elem, float* out) { *out = std::exp(elem); }; + case element_op::exponential_standard_ratio: + return [constant](sum_elem_type elem, float* out) { + *out = std::exp(-elem / constant); + }; + case element_op::logarithm_one_plus_exp: + return [](sum_elem_type elem, float* out) { + *out = std::log1p(std::exp(elem)); + }; + default: + return [](sum_elem_type elem, float* out) { *out = elem; }; } - for (auto row_index = std::size_t{}; row_index < num_row; ++row_index) { - row_has_missing[row_index] = std::transform_reduce( + }(); + } + + private: + std::function postprocess_element; + + auto precompute_missing(float const* input, std::size_t num_row) const + { + auto result = false; + if (num_row > row_has_missing.size()) { + row_has_missing.resize(num_row); + } + for (auto row_index = std::size_t{}; row_index < num_row; ++row_index) { + row_has_missing[row_index] = std::transform_reduce( input + row_index * num_feature, - input + (row_index + 1) * num_feature, - false, - std::logical_or<>(), - [](auto val) { return std::isnan(val); } - ); - result = result || row_has_missing[row_index]; - } - return result; + input + (row_index + 1) * num_feature, false, std::logical_or<>(), + [](auto val) { return std::isnan(val); }); + result = result || row_has_missing[row_index]; } + return result; + } - void apply_postprocessing( - std::vector const& grove_sum, - float* output, - std::size_t num_row, - std::size_t num_grove, - thread_count nthread) const { - - if (row_postproc != row_op::max_index) { + void apply_postprocessing( + std::vector const& grove_sum, float* output, + std::size_t num_row, std::size_t num_grove, + thread_count nthread) const + { + if (row_postproc != row_op::max_index) { #pragma omp parallel for num_threads(static_cast(nthread)) - for (auto row_index = std::size_t{}; row_index < num_row; ++row_index) { - auto const grove_output_index = row_index * num_class * num_grove; - for (auto class_index = std::size_t{}; class_index < num_class; ++class_index) { - auto const class_output_index = grove_output_index + class_index * num_grove; - auto const grove_sum_begin = std::begin(grove_sum); - postprocess_element(std::reduce( - grove_sum_begin + class_output_index, - grove_sum_begin + class_output_index + num_grove, - bias * average_factor - ) / average_factor, output + row_index * num_class + class_index); - } - if (row_postproc == row_op::softmax) { - auto const row_begin = output + row_index * num_class; - auto const row_end = row_begin + num_class; - auto const max_value = *std::max_element(row_begin, row_end); - std::transform( - row_begin, - row_end, - row_begin, - [&max_value](auto const& val) { return std::exp(val - max_value); } - ); - auto const normalization = std::reduce(row_begin, row_end); - std::transform( - row_begin, - row_end, - output + row_index * num_class, + for (auto row_index = std::size_t{}; row_index < num_row; ++row_index) { + auto const grove_output_index = row_index * num_class * num_grove; + for (auto class_index = std::size_t{}; class_index < num_class; + ++class_index) { + auto const class_output_index = + grove_output_index + class_index * num_grove; + auto const grove_sum_begin = std::begin(grove_sum); + postprocess_element( + std::reduce( + grove_sum_begin + class_output_index, + grove_sum_begin + class_output_index + num_grove, + bias * average_factor) / + average_factor, + output + row_index * num_class + class_index); + } + if (row_postproc == row_op::softmax) { + auto const row_begin = output + row_index * num_class; + auto const row_end = row_begin + num_class; + auto const max_value = *std::max_element(row_begin, row_end); + std::transform( + row_begin, row_end, row_begin, [&max_value](auto const& val) { + return std::exp(val - max_value); + }); + auto const normalization = std::reduce(row_begin, row_end); + std::transform( + row_begin, row_end, output + row_index * num_class, [&normalization](auto const& val) { return val / normalization; - } - ); - } + }); } - } else { + } + } else { #pragma omp parallel for num_threads(static_cast(nthread)) - for (auto row_index = std::size_t{}; row_index < num_row; ++row_index) { - auto grove_output_index = row_index * num_class * num_grove; - auto row_output = std::vector(num_class, 0); + for (auto row_index = std::size_t{}; row_index < num_row; ++row_index) { + auto grove_output_index = row_index * num_class * num_grove; + auto row_output = std::vector(num_class, 0); - for (auto class_index = std::size_t{}; class_index < num_class; ++class_index) { - auto class_output_index = grove_output_index + class_index * num_grove; - auto grove_sum_begin = std::begin(grove_sum); - postprocess_element(std::reduce( - grove_sum_begin + class_output_index, - grove_sum_begin + class_output_index + num_grove, - bias * average_factor - ) / average_factor, &(row_output[class_index])); - } - output[row_index] = std::distance( - std::begin(row_output), - std::max_element(std::begin(row_output), std::end(row_output)) - ); + for (auto class_index = std::size_t{}; class_index < num_class; + ++class_index) { + auto class_output_index = + grove_output_index + class_index * num_grove; + auto grove_sum_begin = std::begin(grove_sum); + postprocess_element( + std::reduce( + grove_sum_begin + class_output_index, + grove_sum_begin + class_output_index + num_grove, + bias * average_factor) / + average_factor, + &(row_output[class_index])); } + output[row_index] = std::distance( + std::begin(row_output), + std::max_element(std::begin(row_output), std::end(row_output))); } } + } - template - void predict_(float const* input, float* output, std::size_t num_row, thread_count nthread) const { - // "Groves" are groups of trees which are processed together in a single - // thread. Similarly, "blocks" are groups of rows that are processed - // together + template < + bool missing_value_in_input, bool categorical_model, + bool inclusive_threshold> + void predict_( + float const* input, float* output, std::size_t num_row, + thread_count nthread) const + { + // "Groves" are groups of trees which are processed together in a single + // thread. Similarly, "blocks" are groups of rows that are processed + // together - // Align grove boundaries on cache lines - auto constexpr grove_size = hardware_constructive_interference_size; - // Align block boundaries on cache lines - auto constexpr block_size = hardware_constructive_interference_size; + // Align grove boundaries on cache lines + auto constexpr grove_size = hardware_constructive_interference_size; + // Align block boundaries on cache lines + auto constexpr block_size = hardware_constructive_interference_size; - auto const num_tree = trees.size(); - auto const num_grove = (num_tree / grove_size + (num_tree % grove_size != 0)); - auto const num_block = (num_row / block_size + (num_row % block_size != 0)); + auto const num_tree = trees.size(); + auto const num_grove = + (num_tree / grove_size + (num_tree % grove_size != 0)); + auto const num_block = (num_row / block_size + (num_row % block_size != 0)); - auto forest_sum = std::vector( - num_row * num_class * num_grove, - sum_elem_type{} - ); + auto forest_sum = std::vector( + num_row * num_class * num_grove, sum_elem_type{}); #pragma omp parallel for num_threads(static_cast(nthread)) - for (auto task_index = std::size_t{}; task_index < num_grove * num_block; ++task_index) { - auto const grove_index = task_index / num_block; - auto const block_index = task_index % num_block; - - auto const starting_row = block_index * block_size; - auto const max_row = std::min(starting_row + block_size, num_row); - for (auto row_index = starting_row; row_index < max_row; ++row_index) { + for (auto task_index = std::size_t{}; task_index < num_grove * num_block; + ++task_index) { + auto const grove_index = task_index / num_block; + auto const block_index = task_index % num_block; - auto const starting_tree = grove_index * grove_size; - auto const max_tree = std::min(starting_tree + grove_size, num_tree); - for (auto tree_index = starting_tree; tree_index < max_tree; ++tree_index) { - auto const& tree = trees[tree_index]; + auto const starting_row = block_index * block_size; + auto const max_row = std::min(starting_row + block_size, num_row); + for (auto row_index = starting_row; row_index < max_row; ++row_index) { + auto const starting_tree = grove_index * grove_size; + auto const max_tree = std::min(starting_tree + grove_size, num_tree); + for (auto tree_index = starting_tree; tree_index < max_tree; + ++tree_index) { + auto const& tree = trees[tree_index]; - // Find leaf node - auto node_index = std::size_t{}; - while (tree.nodes[node_index].distant_offset != 0) { - if constexpr (missing_value_in_input) { - if (not row_has_missing[row_index]) { - node_index += tree.template evaluate_tree_node(node_index, input + row_index * num_feature); - } else { - node_index += tree.template evaluate_tree_node(node_index, input + row_index * num_feature); - } + // Find leaf node + auto node_index = std::size_t{}; + while (tree.nodes[node_index].distant_offset != 0) { + if constexpr (missing_value_in_input) { + if (not row_has_missing[row_index]) { + node_index += tree.template evaluate_tree_node< + false, categorical_model, inclusive_threshold>( + node_index, input + row_index * num_feature); } else { - node_index += tree.template evaluate_tree_node(node_index, input + row_index * num_feature); + node_index += tree.template evaluate_tree_node< + true, categorical_model, inclusive_threshold>( + node_index, input + row_index * num_feature); } + } else { + node_index += tree.template evaluate_tree_node< + false, categorical_model, inclusive_threshold>( + node_index, input + row_index * num_feature); } + } - // Add leaf contribution to output - if constexpr (is_container_specialization::value) { - auto leaf_output = tree.get_leaf_value(node_index); - for(auto class_index = std::size_t{}; class_index < num_class; ++class_index){ - forest_sum[ - row_index * num_class * num_grove + - class_index * num_grove - + grove_index - ] += leaf_output[class_index]; - } - } else { - auto class_index = tree_index % num_class; - auto cur_index = row_index * num_class * num_grove + class_index * num_grove + grove_index; - forest_sum[ - row_index * num_class * num_grove + - class_index * num_grove - + grove_index - ] += tree.get_leaf_value(node_index); + // Add leaf contribution to output + if constexpr (is_container_specialization< + output_t, std::vector>::value) { + auto leaf_output = tree.get_leaf_value(node_index); + for (auto class_index = std::size_t{}; class_index < num_class; + ++class_index) { + forest_sum + [row_index * num_class * num_grove + class_index * num_grove + + grove_index] += leaf_output[class_index]; } - } // Trees in grove - } // Rows in block - } // Tasks (groves x blocks) + } else { + auto class_index = tree_index % num_class; + auto cur_index = row_index * num_class * num_grove + + class_index * num_grove + grove_index; + forest_sum + [row_index * num_class * num_grove + class_index * num_grove + + grove_index] += tree.get_leaf_value(node_index); + } + } // Trees in grove + } // Rows in block + } // Tasks (groves x blocks) - apply_postprocessing(forest_sum, output, num_row, num_grove, nthread); - } - }; -} + apply_postprocessing(forest_sum, output, num_row, num_grove, nthread); + } +}; +} // namespace herring diff --git a/src/herring/node.hpp b/src/herring/node.hpp index 4d3985b6..4a1ef1c4 100644 --- a/src/herring/node.hpp +++ b/src/herring/node.hpp @@ -19,73 +19,90 @@ #include namespace herring { - /* Summary of Types - * ---------------- - * value_t (float or double): The value used for testing a node condition or - * for providing the output of leaves. - * feature_index_t (std::uint16_t or std::uint32_t): Index indicating which - * feature this conditional applies to - * offset_t (std::uint16_t or std::uint32_t): Offset between this node and - * its distant child. For small trees, using a smaller type can reduce the - * padded size of the node to as few as 8 bytes. - * output_index_t (typically std::uint32_t): If leaf output values cannot be stored - * in the same memory as test condition values, this index provides a - * lookup location for output values stored in the tree. - */ - template - struct simple_node { - using value_type = value_t; // float or double - using index_type = feature_index_t; - using offset_type = offset_t; - using output_index_type = output_index_t; - using category_set_type = std::bitset; - // Cannot use std::variant here because it takes up 4 additional bytes when - // value_type is float - union value_or_index { - value_type value; - output_index_type index; - category_set_type categories; - }; - value_or_index value; // 4 bytes for float - offset_type distant_offset; // 2 bytes for depth < 16 or small trees; 4 otherwise - index_type feature; // 1-4 bytes, depending on number of features - - simple_node() : value{value_type{}}, distant_offset{}, feature{} {} +/* Summary of Types + * ---------------- + * value_t (float or double): The value used for testing a node condition or + * for providing the output of leaves. + * feature_index_t (std::uint16_t or std::uint32_t): Index indicating which + * feature this conditional applies to + * offset_t (std::uint16_t or std::uint32_t): Offset between this node and + * its distant child. For small trees, using a smaller type can reduce the + * padded size of the node to as few as 8 bytes. + * output_index_t (typically std::uint32_t): If leaf output values cannot be + * stored in the same memory as test condition values, this index provides a + * lookup location for output values stored in the tree. + */ +template < + typename value_t, typename feature_index_t, typename offset_t, + typename output_index_t> +struct simple_node { + using value_type = value_t; // float or double + using index_type = feature_index_t; + using offset_type = offset_t; + using output_index_type = output_index_t; + using category_set_type = + std::bitset; + // Cannot use std::variant here because it takes up 4 additional bytes when + // value_type is float + union value_or_index { + value_type value; + output_index_type index; + category_set_type categories; }; + value_or_index value; // 4 bytes for float + offset_type + distant_offset; // 2 bytes for depth < 16 or small trees; 4 otherwise + index_type feature; // 1-4 bytes, depending on number of features - template - auto evaluate_node(simple_node const& node, float feature_value) { - auto condition = false; - if constexpr (categorical) { - if (feature_value >= 0 && feature_value < node.value.categories.size()) { - // NOTE: This cast aligns with the convention used in LightGBM and - // other frameworks to cast floats when converting to integral - // categories. This can have surprising effects with floating point - // arithmetic, but it is kept this way for now in order to provide - // consistency with results obtained from the training frameworks. - condition = node.value.categories[static_cast(feature_value)]; - } + simple_node() : value{value_type{}}, distant_offset{}, feature{} {} +}; + +template < + bool categorical, bool inclusive_threshold, typename value_t, + typename feature_index_t, typename offset_t, typename output_index_t> +auto +evaluate_node( + simple_node const& node, + float feature_value) +{ + auto condition = false; + if constexpr (categorical) { + if (feature_value >= 0 && feature_value < node.value.categories.size()) { + // NOTE: This cast aligns with the convention used in LightGBM and + // other frameworks to cast floats when converting to integral + // categories. This can have surprising effects with floating point + // arithmetic, but it is kept this way for now in order to provide + // consistency with results obtained from the training frameworks. + condition = + node.value.categories[static_cast(feature_value)]; + } + } else { + if constexpr (inclusive_threshold) { + condition = (feature_value <= node.value.value); } else { - if constexpr (inclusive_threshold) { - condition = (feature_value <= node.value.value); - } else { - condition = (feature_value < node.value.value); - } + condition = (feature_value < node.value.value); } + } - // This narrowing conversion is guaranteed safe because distant_offset - // cannot be 0 - // TODO(wphicks): Guarantee this with custom types - // (https://github.com/triton-inference-server/fil_backend/issues/204) + // This narrowing conversion is guaranteed safe because distant_offset + // cannot be 0 + // TODO(wphicks): Guarantee this with custom types + // (https://github.com/triton-inference-server/fil_backend/issues/204) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wnarrowing" - return offset_t{1 + condition * (node.distant_offset - 1)}; + return offset_t{1 + condition * (node.distant_offset - 1)}; #pragma GCC diagnostic pop - } +} - template - auto evaluate_node(simple_node const& node, float const* row) { - auto feature_value = *(row + node.feature); - return evaluate_node(node, feature_value); - } +template < + bool categorical, bool inclusive_threshold, typename value_t, + typename feature_index_t, typename offset_t, typename output_index_t> +auto +evaluate_node( + simple_node const& node, + float const* row) +{ + auto feature_value = *(row + node.feature); + return evaluate_node(node, feature_value); } +} // namespace herring diff --git a/src/herring/omp_helpers.hpp b/src/herring/omp_helpers.hpp index 843fd6ac..4e66d6e3 100644 --- a/src/herring/omp_helpers.hpp +++ b/src/herring/omp_helpers.hpp @@ -20,16 +20,19 @@ template struct thread_count { thread_count() : value{omp_get_max_threads()} {} - thread_count(T t) : value{ - [](T t) { - auto result = T{t}; - auto max_count = omp_get_max_threads(); - if ( t < 1 || t > max_count) { - result = max_count; - } - return result; - }(t)} {} + thread_count(T t) + : value{[](T t) { + auto result = T{t}; + auto max_count = omp_get_max_threads(); + if (t < 1 || t > max_count) { + result = max_count; + } + return result; + }(t)} + { + } operator int() const { return static_cast(value); } + private: T value; }; diff --git a/src/herring/output_ops.hpp b/src/herring/output_ops.hpp index b04b1fb8..7704a579 100644 --- a/src/herring/output_ops.hpp +++ b/src/herring/output_ops.hpp @@ -18,21 +18,17 @@ namespace herring { - /* Enum representing possible element-wise operations on output */ - enum class element_op { - disable, - signed_square, - hinge, - sigmoid, - exponential, - exponential_standard_ratio, - logarithm_one_plus_exp - }; +/* Enum representing possible element-wise operations on output */ +enum class element_op { + disable, + signed_square, + hinge, + sigmoid, + exponential, + exponential_standard_ratio, + logarithm_one_plus_exp +}; - /* Enum representing possible row-wise operations on output */ - enum class row_op { - disable, - softmax, - max_index - }; -} +/* Enum representing possible row-wise operations on output */ +enum class row_op { disable, softmax, max_index }; +} // namespace herring diff --git a/src/herring/tl_helpers.hpp b/src/herring/tl_helpers.hpp index b595cfde..8c7760e9 100644 --- a/src/herring/tl_helpers.hpp +++ b/src/herring/tl_helpers.hpp @@ -16,32 +16,26 @@ #pragma once -#include -#include -#include -#include - #include +#include #include #include +#include +#include +#include namespace herring { struct unconvertible_model_exception : std::exception { - unconvertible_model_exception () : msg_{"Model could not be converted"} - { - } + unconvertible_model_exception() : msg_{"Model could not be converted"} {} - unconvertible_model_exception (std::string msg) : msg_{msg} - { - } + unconvertible_model_exception(std::string msg) : msg_{msg} {} - unconvertible_model_exception (char const* msg) : msg_{msg} - { - } + unconvertible_model_exception(char const* msg) : msg_{msg} {} virtual char const* what() const noexcept { return msg_.c_str(); } + private: std::string msg_; }; @@ -50,8 +44,12 @@ struct unconvertible_model_exception : std::exception { // has_categorical_trees parameters are changed as a side-effect of this // function. This is messy and confusing, and it should be fixed in a later // refactor https://github.com/triton-inference-server/fil_backend/issues/205 -template -auto convert_tree(treelite::Tree const& tl_tree, bool& use_inclusive_threshold, bool& categorical_model) { +template +auto +convert_tree( + treelite::Tree const& tl_tree, + bool& use_inclusive_threshold, bool& categorical_model) +{ auto result = tree_t{}; result.nodes.reserve(tl_tree.num_nodes); result.default_distant.reserve(tl_tree.num_nodes); @@ -67,7 +65,8 @@ auto convert_tree(treelite::Tree const& tl_tree, bo // Stack of TL node ids for DFS auto node_stack = std::stack>{}; - // Keep track of final location of parent in nodes vector for each node currently in stack + // Keep track of final location of parent in nodes vector for each node + // currently in stack auto parent_stack = std::stack>{}; // TODO(wphicks): Just store a reference to the parent directly rather than // an index @@ -86,16 +85,20 @@ auto convert_tree(treelite::Tree const& tl_tree, bo auto parent_index = parent_stack.top(); parent_stack.pop(); // Don't care if it overwrites; we always visit distant child last - result.nodes[parent_index].distant_offset = result.nodes.size() - parent_index; + result.nodes[parent_index].distant_offset = + result.nodes.size() - parent_index; } result.nodes.emplace_back(); auto& cur_node = result.nodes.back(); if (tl_tree.IsLeaf(cur_node_id)) { - cur_node.distant_offset = typename tree_t::node_type::offset_type{}; // 0 offset means no child + cur_node.distant_offset = + typename tree_t::node_type::offset_type{}; // 0 offset means no child - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v< + typename tree_t::output_type, + decltype(tl_tree.LeafVector(0))>) { if (tl_tree.HasLeafVector(cur_node_id)) { cur_node.value.index = result.leaf_outputs.size(); result.leaf_outputs.push_back(tl_tree.LeafVector(cur_node_id)); @@ -103,13 +106,19 @@ auto convert_tree(treelite::Tree const& tl_tree, bo throw unconvertible_model_exception{"Leaf vector expected"}; } } else { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v< + typename tree_t::node_type::value_type, + typename tree_t::output_type>) { // Threshold and output values are the same type; store in same union // attribute cur_node.value.value = tl_tree.LeafValue(cur_node_id); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v< + typename tree_t::output_type, + typename tree_t::node_type:: + output_index_type>) { // Threshold and output value types are different, but output value - // type happens to be the same as index type; use index union attribute + // type happens to be the same as index type; use index union + // attribute cur_node.value.index = tl_tree.LeafValue(cur_node_id); } else { // Threshold and output value types are different; output value must @@ -133,24 +142,27 @@ auto convert_tree(treelite::Tree const& tl_tree, bo // Distant child is always less-than or in-category condition if (!categorical) { cur_node.value.value = tl_tree.Threshold(cur_node_id); - auto inclusive_threshold_node = ( - tl_operator == treelite::Operator::kLE || tl_operator == treelite::Operator::kGT - ); + auto inclusive_threshold_node = + (tl_operator == treelite::Operator::kLE || + tl_operator == treelite::Operator::kGT); if (!inclusive_threshold_node && use_inclusive_threshold) { - throw unconvertible_model_exception{"Inconsistent use of inclusive threshold"}; + throw unconvertible_model_exception{ + "Inconsistent use of inclusive threshold"}; } else { use_inclusive_threshold = inclusive_threshold_node; } - if ( - tl_operator == treelite::Operator::kLT || tl_operator == treelite::Operator::kLE) { + if (tl_operator == treelite::Operator::kLT || + tl_operator == treelite::Operator::kLE) { hot_child = right_id; distant_child = left_id; } else if ( - tl_operator == treelite::Operator::kGT || tl_operator == treelite::Operator::kGE) { + tl_operator == treelite::Operator::kGT || + tl_operator == treelite::Operator::kGE) { hot_child = left_id; distant_child = right_id; } else { - throw unconvertible_model_exception{"Unsupported comparison operator"}; + throw unconvertible_model_exception{ + "Unsupported comparison operator"}; } } else { if (tl_tree.CategoriesListRightChild(cur_node_id)) { @@ -161,11 +173,14 @@ auto convert_tree(treelite::Tree const& tl_tree, bo distant_child = left_id; } auto tl_categories = tl_tree.MatchingCategories(cur_node_id); - auto constexpr max_category = typename tree_t::node_type::category_set_type{}.size(); - cur_node.value.categories = typename tree_t::node_type::category_set_type{}; + auto constexpr max_category = + typename tree_t::node_type::category_set_type{}.size(); + cur_node.value.categories = + typename tree_t::node_type::category_set_type{}; for (auto category : tl_categories) { if (category >= max_category) { - throw unconvertible_model_exception{"Too many categories for categorical storage size"}; + throw unconvertible_model_exception{ + "Too many categories for categorical storage size"}; } cur_node.value.categories[category] = true; } @@ -181,61 +196,100 @@ auto convert_tree(treelite::Tree const& tl_tree, bo parent_stack.push(result.nodes.size() - 1); parent_stack.push(result.nodes.size() - 1); - } // End of handling for non-leaf nodes - } // node_stack is empty; DFS is done + } // End of handling for non-leaf nodes + } // node_stack is empty; DFS is done return result; -} // convert_tree +} // convert_tree using tl_dispatched_model = std::variant< - // value_type, feature_index_type, offset_type, output_index_type, output_type - simple_model, - simple_model>, - simple_model, - simple_model>, - - simple_model, - simple_model>, - simple_model, - simple_model>, - - simple_model, - simple_model>, - simple_model, - simple_model>, - - simple_model, - simple_model>, - simple_model, - simple_model>, - - simple_model, - simple_model>, - simple_model, - simple_model>, - - simple_model, - simple_model>, - simple_model, - simple_model>, - - simple_model, - simple_model>, - simple_model, - simple_model>, - - simple_model, - simple_model>, - simple_model, - simple_model> ->; - - -template -auto get_average_factor(treelite::ModelImpl const& tl_model) { + // value_type, feature_index_type, offset_type, output_index_type, + // output_type + simple_model< + float, std::uint16_t, std::uint16_t, std::uint32_t, std::uint32_t>, + simple_model< + float, std::uint16_t, std::uint16_t, std::uint32_t, + std::vector>, + simple_model, + simple_model< + float, std::uint16_t, std::uint16_t, std::uint32_t, std::vector>, + + simple_model< + float, std::uint16_t, std::uint32_t, std::uint32_t, std::uint32_t>, + simple_model< + float, std::uint16_t, std::uint32_t, std::uint32_t, + std::vector>, + simple_model, + simple_model< + float, std::uint16_t, std::uint32_t, std::uint32_t, std::vector>, + + simple_model< + float, std::uint32_t, std::uint16_t, std::uint32_t, std::uint32_t>, + simple_model< + float, std::uint32_t, std::uint16_t, std::uint32_t, + std::vector>, + simple_model, + simple_model< + float, std::uint32_t, std::uint16_t, std::uint32_t, std::vector>, + + simple_model< + float, std::uint32_t, std::uint32_t, std::uint32_t, std::uint32_t>, + simple_model< + float, std::uint32_t, std::uint32_t, std::uint32_t, + std::vector>, + simple_model, + simple_model< + float, std::uint32_t, std::uint32_t, std::uint32_t, std::vector>, + + simple_model< + double, std::uint16_t, std::uint16_t, std::uint32_t, std::uint32_t>, + simple_model< + double, std::uint16_t, std::uint16_t, std::uint32_t, + std::vector>, + simple_model, + simple_model< + double, std::uint16_t, std::uint16_t, std::uint32_t, + std::vector>, + + simple_model< + double, std::uint16_t, std::uint32_t, std::uint32_t, std::uint32_t>, + simple_model< + double, std::uint16_t, std::uint32_t, std::uint32_t, + std::vector>, + simple_model, + simple_model< + double, std::uint16_t, std::uint32_t, std::uint32_t, + std::vector>, + + simple_model< + double, std::uint32_t, std::uint16_t, std::uint32_t, std::uint32_t>, + simple_model< + double, std::uint32_t, std::uint16_t, std::uint32_t, + std::vector>, + simple_model, + simple_model< + double, std::uint32_t, std::uint16_t, std::uint32_t, + std::vector>, + + simple_model< + double, std::uint32_t, std::uint32_t, std::uint32_t, std::uint32_t>, + simple_model< + double, std::uint32_t, std::uint32_t, std::uint32_t, + std::vector>, + simple_model, + simple_model< + double, std::uint32_t, std::uint32_t, std::uint32_t, + std::vector>>; + + +template +auto +get_average_factor( + treelite::ModelImpl const& tl_model) +{ if (tl_model.average_tree_output) { if (tl_model.task_type == treelite::TaskType::kMultiClfGrovePerClass) { - return float(tl_model.trees.size() / tl_model.task_param.num_class); + return float(tl_model.trees.size() / tl_model.task_param.num_class); } else { return float(tl_model.trees.size()); } @@ -243,19 +297,26 @@ auto get_average_factor(treelite::ModelImpl const& return 1.0f; } -template -auto convert_dispatched_model(treelite::ModelImpl const& tl_model) { - using model_type = std::variant_alternative_t; +template < + std::size_t model_variant_index, typename tl_threshold_t, + typename tl_output_t> +auto +convert_dispatched_model( + treelite::ModelImpl const& tl_model) +{ + using model_type = + std::variant_alternative_t; auto result = model_type{}; result.use_inclusive_threshold = false; result.trees.reserve(tl_model.trees.size()); std::transform( - std::begin(tl_model.trees), - std::end(tl_model.trees), - std::back_inserter(result.trees), - [&result](auto&& tl_tree) { return convert_tree(tl_tree, result.use_inclusive_threshold, result.has_categorical_trees); } - ); + std::begin(tl_model.trees), std::end(tl_model.trees), + std::back_inserter(result.trees), [&result](auto&& tl_tree) { + return convert_tree( + tl_tree, result.use_inclusive_threshold, + result.has_categorical_trees); + }); result.num_class = tl_model.task_param.num_class; result.num_feature = tl_model.num_feature; @@ -266,8 +327,7 @@ auto convert_dispatched_model(treelite::ModelImpl c result.row_postproc = row_op::disable; auto tl_pred_transform = std::string{tl_model.param.pred_transform}; - if ( - tl_pred_transform == std::string{"identity"} || + if (tl_pred_transform == std::string{"identity"} || tl_pred_transform == std::string{"identity_multiclass"}) { result.set_element_postproc(element_op::disable); result.row_postproc = row_op::disable; @@ -293,41 +353,57 @@ auto convert_dispatched_model(treelite::ModelImpl c result.postproc_constant = tl_model.param.sigmoid_alpha; result.set_element_postproc(element_op::sigmoid); } else { - throw unconvertible_model_exception{"Unrecognized Treelite pred_transform string"}; + throw unconvertible_model_exception{ + "Unrecognized Treelite pred_transform string"}; } return result; } -template -auto convert_model( +template < + typename tl_threshold_t, typename tl_output_t, std::size_t variant_index> +auto +convert_model( treelite::ModelImpl const& tl_model, - std::size_t target_variant_index) { + std::size_t target_variant_index) +{ auto result = tl_dispatched_model{}; if constexpr (variant_index != std::variant_size_v) { if (variant_index == target_variant_index) { - using model_type = std::variant_alternative_t; + using model_type = + std::variant_alternative_t; if constexpr ( - std::is_same_v && - (std::is_same_v || - std::is_same_v, typename model_type::tree_type::output_type>)) { - result = convert_dispatched_model(tl_model); + std::is_same_v< + tl_threshold_t, + typename model_type::tree_type::node_type::value_type> && + (std::is_same_v< + tl_output_t, typename model_type::tree_type::output_type> || + std::is_same_v< + std::vector, + typename model_type::tree_type::output_type>)) { + result = convert_dispatched_model< + variant_index, tl_threshold_t, tl_output_t>(tl_model); } else { - throw unconvertible_model_exception("Unexpected TL types for this variant"); + throw unconvertible_model_exception( + "Unexpected TL types for this variant"); } } else { - result = convert_model(tl_model, target_variant_index); + result = convert_model( + tl_model, target_variant_index); } } return result; } template -auto convert_model(treelite::ModelImpl const& tl_model) { - - auto max_offset = std::accumulate(std::begin(tl_model.trees), std::end(tl_model.trees), int{}, [](auto&& prev_max, auto&& tree) { - return std::max(prev_max, tree.num_nodes); - }); +auto +convert_model(treelite::ModelImpl const& tl_model) +{ + auto max_offset = std::accumulate( + std::begin(tl_model.trees), std::end(tl_model.trees), int{}, + [](auto&& prev_max, auto&& tree) { + return std::max(prev_max, tree.num_nodes); + }); // TODO (wphicks): max_offset should be the min of the value calculated in // the above and 2**d + 1 where d is the max depth of any tree. For now, we // are just always using std::uint32_t for offset_t because using @@ -335,23 +411,22 @@ auto convert_model(treelite::ModelImpl const& tl_mo // padding. // https://github.com/triton-inference-server/fil_backend/issues/206 - auto constexpr large_threshold = std::size_t{std::is_same_v}; - auto const large_num_feature = std::size_t{tl_model.num_feature >= std::numeric_limits::max()}; - auto const large_max_offset = std::size_t{max_offset >= std::numeric_limits::max()}; - auto constexpr non_integer_output = std::size_t{!std::is_same_v}; - auto const has_vector_leaves = std::size_t{ - tl_model.task_param.leaf_vector_size > 1 - }; + auto constexpr large_threshold = + std::size_t{std::is_same_v}; + auto const large_num_feature = std::size_t{ + tl_model.num_feature >= std::numeric_limits::max()}; + auto const large_max_offset = + std::size_t{max_offset >= std::numeric_limits::max()}; + auto constexpr non_integer_output = + std::size_t{!std::is_same_v}; + auto const has_vector_leaves = + std::size_t{tl_model.task_param.leaf_vector_size > 1}; auto variant_index = std::size_t{ - (large_threshold << 4) + - (large_num_feature << 3) + - (large_max_offset << 2) + - (non_integer_output << 1) + - has_vector_leaves - }; + (large_threshold << 4) + (large_num_feature << 3) + + (large_max_offset << 2) + (non_integer_output << 1) + has_vector_leaves}; return convert_model(tl_model, variant_index); } -} +} // namespace herring diff --git a/src/herring/tree.hpp b/src/herring/tree.hpp index bdf25bec..db732489 100644 --- a/src/herring/tree.hpp +++ b/src/herring/tree.hpp @@ -17,191 +17,218 @@ #pragma once #include +#include #include #include -#include #include "herring/type_helpers.hpp" namespace herring { - /* A tree that can just return the stored value of nodes as its output */ - template - struct simple_tree { - using node_type = simple_node; - using output_type = output_t; - std::vector nodes; - std::vector default_distant; - std::vector categorical_node; - bool has_categorical_nodes; +/* A tree that can just return the stored value of nodes as its output */ +template < + typename value_t, typename feature_index_t, typename offset_t, + typename output_index_t, typename output_t> +struct simple_tree { + using node_type = + simple_node; + using output_type = output_t; + std::vector nodes; + std::vector default_distant; + std::vector categorical_node; + bool has_categorical_nodes; - auto get_leaf_value(node_type const& node) const { - if constexpr (std::is_same_v) { - return node.value.value; - } else { - static_assert(std::is_same_v); - return node.value.index; - } + auto get_leaf_value(node_type const& node) const + { + if constexpr (std::is_same_v) { + return node.value.value; + } else { + static_assert(std::is_same_v); + return node.value.index; } + } - auto get_leaf_value(std::size_t node_index) const { - return get_leaf_value(nodes[node_index]); - } + auto get_leaf_value(std::size_t node_index) const + { + return get_leaf_value(nodes[node_index]); + } - template - auto evaluate_tree_node(std::size_t node_index, float const* row) const { - auto result = offset_t{}; - if constexpr (categorical_model) { - if (!has_categorical_nodes) { - result = evaluate_tree_node_( - node_index, row - ); - } else { - result = evaluate_tree_node_( - node_index, row - ); - } + template < + bool missing_values_in_row, bool categorical_model, + bool inclusive_threshold> + auto evaluate_tree_node(std::size_t node_index, float const* row) const + { + auto result = offset_t{}; + if constexpr (categorical_model) { + if (!has_categorical_nodes) { + result = evaluate_tree_node_< + missing_values_in_row, false, inclusive_threshold>(node_index, row); } else { - result = evaluate_tree_node_( - node_index, row - ); + result = evaluate_tree_node_< + missing_values_in_row, true, inclusive_threshold>(node_index, row); } - return result; - }; + } else { + result = evaluate_tree_node_< + missing_values_in_row, false, inclusive_threshold>(node_index, row); + } + return result; + }; - private: - template - auto evaluate_tree_node_(std::size_t node_index, float const* row) const { - auto const& node = nodes[node_index]; - auto result = offset_t{}; - if constexpr(missing_values_in_row) { - auto feature_value = *(row + node.feature); - auto present = !std::isnan(feature_value); - if (present) { - if constexpr (categorical_tree) { - if (!categorical_node[node_index]) { - result = evaluate_node(node, feature_value); - } else { - result = evaluate_node(node, feature_value); - } + private: + template < + bool missing_values_in_row, bool categorical_tree, + bool inclusive_threshold> + auto evaluate_tree_node_(std::size_t node_index, float const* row) const + { + auto const& node = nodes[node_index]; + auto result = offset_t{}; + if constexpr (missing_values_in_row) { + auto feature_value = *(row + node.feature); + auto present = !std::isnan(feature_value); + if (present) { + if constexpr (categorical_tree) { + if (!categorical_node[node_index]) { + result = + evaluate_node(node, feature_value); } else { - result = evaluate_node(node, feature_value); + result = + evaluate_node(node, feature_value); } } else { - // This narrowing conversion is guaranteed safe because distant_offset - // cannot be 0 - // TODO(wphicks): Guarantee this with custom types - // (https://github.com/triton-inference-server/fil_backend/issues/204) + result = + evaluate_node(node, feature_value); + } + } else { + // This narrowing conversion is guaranteed safe because distant_offset + // cannot be 0 + // TODO(wphicks): Guarantee this with custom types + // (https://github.com/triton-inference-server/fil_backend/issues/204) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wnarrowing" - result = 1 + (node.distant_offset - 1) * default_distant[node_index]; + result = 1 + (node.distant_offset - 1) * default_distant[node_index]; #pragma GCC diagnostic pop - } - } else { - if constexpr (categorical_tree) { - if (!categorical_node[node_index]) { - result = evaluate_node(node, row); - } else { - result = evaluate_node(node, row); - } - } else { + } + } else { + if constexpr (categorical_tree) { + if (!categorical_node[node_index]) { result = evaluate_node(node, row); + } else { + result = evaluate_node(node, row); } + } else { + result = evaluate_node(node, row); } - return result; } - }; + return result; + } +}; - /* A tree that must look up its output values in separate storage */ - template - struct lookup_tree { - using node_type = simple_node; - using output_type = output_t; - std::vector nodes; - std::vector leaf_outputs; - std::vector default_distant; - std::vector categorical_node; - bool has_categorical_nodes; +/* A tree that must look up its output values in separate storage */ +template < + typename value_t, typename feature_index_t, typename offset_t, + typename output_index_t, typename output_t> +struct lookup_tree { + using node_type = + simple_node; + using output_type = output_t; + std::vector nodes; + std::vector leaf_outputs; + std::vector default_distant; + std::vector categorical_node; + bool has_categorical_nodes; - template < + template < typename tree_output_type = output_t, - std::enable_if_t::value, bool> = true> - auto const& get_leaf_value(node_type const& node) const { - return leaf_outputs[node.value.index]; - } + std::enable_if_t< + is_container_specialization::value, + bool> = true> + auto const& get_leaf_value(node_type const& node) const + { + return leaf_outputs[node.value.index]; + } - template < + template < typename tree_output_type = output_t, - std::enable_if_t::value, bool> = true> - auto get_leaf_value(node_type const& node) const { - return leaf_outputs[node.value.index]; - } + std::enable_if_t< + !is_container_specialization::value, + bool> = true> + auto get_leaf_value(node_type const& node) const + { + return leaf_outputs[node.value.index]; + } - auto get_leaf_value(std::size_t node_id) const { - return leaf_outputs[nodes[node_id].value.index]; - } + auto get_leaf_value(std::size_t node_id) const + { + return leaf_outputs[nodes[node_id].value.index]; + } - template - auto evaluate_tree_node(std::size_t node_index, float const* row) const { - auto result = offset_t{}; - if constexpr (categorical_model) { - if (!has_categorical_nodes) { - result = evaluate_tree_node_( - node_index, row - ); - } else { - result = evaluate_tree_node_( - node_index, row - ); - } + template < + bool missing_values_in_row, bool categorical_model, + bool inclusive_threshold> + auto evaluate_tree_node(std::size_t node_index, float const* row) const + { + auto result = offset_t{}; + if constexpr (categorical_model) { + if (!has_categorical_nodes) { + result = evaluate_tree_node_< + missing_values_in_row, false, inclusive_threshold>(node_index, row); } else { - result = evaluate_tree_node_( - node_index, row - ); + result = evaluate_tree_node_< + missing_values_in_row, true, inclusive_threshold>(node_index, row); } - return result; - }; + } else { + result = evaluate_tree_node_< + missing_values_in_row, false, inclusive_threshold>(node_index, row); + } + return result; + }; - private: - template - auto evaluate_tree_node_(std::size_t node_index, float const* row) const { - auto const& node = nodes[node_index]; - auto result = offset_t{}; - if constexpr(missing_values_in_row) { - auto feature_value = *(row + node.feature); - auto present = !std::isnan(feature_value); - if (present) { - if constexpr (categorical_tree) { - if (!categorical_node[node_index]) { - result = evaluate_node(node, feature_value); - } else { - result = evaluate_node(node, feature_value); - } + private: + template < + bool missing_values_in_row, bool categorical_tree, + bool inclusive_threshold> + auto evaluate_tree_node_(std::size_t node_index, float const* row) const + { + auto const& node = nodes[node_index]; + auto result = offset_t{}; + if constexpr (missing_values_in_row) { + auto feature_value = *(row + node.feature); + auto present = !std::isnan(feature_value); + if (present) { + if constexpr (categorical_tree) { + if (!categorical_node[node_index]) { + result = + evaluate_node(node, feature_value); } else { - result = evaluate_node(node, feature_value); + result = + evaluate_node(node, feature_value); } } else { - // This narrowing conversion is guaranteed safe because distant_offset - // cannot be 0 - // TODO(wphicks): Guarantee this with custom types - // (https://github.com/triton-inference-server/fil_backend/issues/204) + result = + evaluate_node(node, feature_value); + } + } else { + // This narrowing conversion is guaranteed safe because distant_offset + // cannot be 0 + // TODO(wphicks): Guarantee this with custom types + // (https://github.com/triton-inference-server/fil_backend/issues/204) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wnarrowing" - result = 1 + (node.distant_offset - 1) * default_distant[node_index]; + result = 1 + (node.distant_offset - 1) * default_distant[node_index]; #pragma GCC diagnostic pop - } - } else { - if constexpr (categorical_tree) { - if (!categorical_node[node_index]) { - result = evaluate_node(node, row); - } else { - result = evaluate_node(node, row); - } - } else { + } + } else { + if constexpr (categorical_tree) { + if (!categorical_node[node_index]) { result = evaluate_node(node, row); + } else { + result = evaluate_node(node, row); } + } else { + result = evaluate_node(node, row); } - return result; } - }; -} + return result; + } +}; +} // namespace herring diff --git a/src/herring/type_helpers.hpp b/src/herring/type_helpers.hpp index 8259ec8b..082cb392 100644 --- a/src/herring/type_helpers.hpp +++ b/src/herring/type_helpers.hpp @@ -19,13 +19,13 @@ #include namespace herring { - template class U> - struct is_container_specialization : std::false_type { - using value_type = T; - }; +template class U> +struct is_container_specialization : std::false_type { + using value_type = T; +}; - template class U, typename... Args> - struct is_container_specialization, U>: std::true_type { - using value_type = typename U::value_type; - }; -} +template