Add GitHub action to format and lint code (#265)

* Set up pre-commit * Apply pre-commit * Make max-line-length 88 * Remove unnecessary line * Run pre-install with updated config * End of file newline * Fix comment * Remove unused variable * Add and apply isort * Newline at eof * Remove duplicate copyrights, add hooks link * Pin workflow Ubuntu version * Remove unnecessary imports * Remove unused import, update copyrights
triton-inference-server · Jun 27, 2023 · 902df12 · 902df12
1 parent 7865b03
commit 902df12
Show file tree

Hide file tree

Showing 63 changed files with 1,358 additions and 1,039 deletions.
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -63,12 +63,12 @@ jobs:
         # If you wish to specify custom queries, you can do so here or in a config file.
         # By default, queries listed here will override any specified in a config file.
         # Prefix the list here with "+" to use these queries and those in the config file.
-        
+
         # Details on CodeQL's query packs refer to:
         # https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
         queries: +security-and-quality
 
-        
+
     # Autobuild attempts to build any compiled languages  (C/C++, C#, Go, or Java).
     # If this step fails, then you should remove it and run the build manually (see below)
     - name: Autobuild
@@ -77,7 +77,7 @@ jobs:
     # Command-line programs to run using the OS shell.
     # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
 
-    #   If the Autobuild fails above, remove it and uncomment the following three lines. 
+    #   If the Autobuild fails above, remove it and uncomment the following three lines.
     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
 
     # - run: |

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -0,0 +1,40 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: pre-commit
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-22.04
+    steps:
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v3
+    - uses: pre-commit/[email protected]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,73 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+repos:
+- repo: https://github.com/timothycrosley/isort
+  rev: 5.12.0
+  hooks:
+  - id: isort
+    additional_dependencies: [toml]
+- repo: https://github.com/psf/black
+  rev: 23.1.0
+  hooks:
+      - id: black
+        types_or: [python, cython]
+- repo: https://github.com/PyCQA/flake8
+  rev: 5.0.4
+  hooks:
+      - id: flake8
+        args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
+        types_or: [python, cython]
+- repo: https://github.com/pre-commit/mirrors-clang-format
+  rev: v16.0.5
+  hooks:
+      - id: clang-format
+        types_or: [c, c++, cuda, proto, textproto, java]
+        args: ["-fallback-style=none", "-style=file", "-i"]
+- repo: https://github.com/codespell-project/codespell
+  rev: v2.2.4
+  hooks:
+      - id: codespell
+        additional_dependencies: [tomli]
+        args: ["--toml", "pyproject.toml"]
+        exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
+# More details about these pre-commit hooks here:
+# https://pre-commit.com/hooks.html
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.4.0
+  hooks:
+    - id: check-case-conflict
+    - id: check-executables-have-shebangs
+    - id: check-merge-conflict
+    - id: check-json
+    - id: check-toml
+    - id: check-yaml
+    - id: check-shebang-scripts-are-executable
+    - id: end-of-file-fixer
+      types_or: [c, c++, cuda, proto, textproto, java, python]
+    - id: mixed-line-ending
+    - id: requirements-txt-fixer
+    - id: trailing-whitespace
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -49,7 +49,7 @@ endif()
 #
 # Dependencies
 #
-# FetchContent's composibility isn't very good. We must include the
+# FetchContent's composability isn't very good. We must include the
 # transitive closure of all repos so that we can override the tag.
 #
 include(FetchContent)
@@ -249,7 +249,7 @@ target_link_libraries(
     Threads::Threads
     triton-backend-utils          # from repo-backend
     ${CMAKE_DL_LIBS}              # dlopen and dlclose
-    -lrt                          # shared memory 
+    -lrt                          # shared memory
     triton-core-serverstub        # from repo-core
     ZLIB::ZLIB
     -larchive
@@ -263,7 +263,7 @@ target_link_libraries(
    triton-backend-utils           # from repo-backend
    ${CMAKE_DL_LIBS}               # dlopen and dlclose
    pybind11::embed
-   -lrt                           # shared memory 
+   -lrt                           # shared memory
    -larchive                      # libarchive
 )
 

diff --git a/README.md b/README.md
@@ -537,7 +537,7 @@ The decoupled mode is powerful and supports various other use cases:
 
 
 The [decoupled examples](examples/decoupled/README.md) demonstrate
-full power of what can be acheived from decoupled API. Read
+full power of what can be achieved from decoupled API. Read
 [Decoupled Backends and Models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/decoupled_models.md)
 for more details on how to host a decoupled model.
 
@@ -586,15 +586,15 @@ documentation.
 ## Managing Python Runtime and Libraries
 
 Python backend shipped in the [NVIDIA GPU Cloud](https://ngc.nvidia.com/)
-containers uses Python 3.10. Python backend is able to use the libaries
+containers uses Python 3.10. Python backend is able to use the libraries
 that exist in the current Python environment. These libraries can
 be installed in a virtualenv, conda environment, or the global system
 Python. These libraries will only be used if the Python version matches
 the Python version of the Python backend's stub executable. For example,
 if you install a set of libraries in a Python 3.9 environment and your
 Python backend stub is compiled with Python 3.10 these libraries will NOT
 be available in your Python model served using Triton. You would need to
-compile the stub executble with Python 3.9 using the instructions in
+compile the stub executable with Python 3.9 using the instructions in
 [Building Custom Python Backend Stub](#building-custom-python-backend-stub)
 section.
 
@@ -849,7 +849,7 @@ will create additional threads instead of spawning separate processes.
 
 ## Running Multiple Instances of Triton Server
 
-Python backend uses shared memory to transfer requests to the stub process. 
+Python backend uses shared memory to transfer requests to the stub process.
 When running multiple instances of Triton Server on the same machine that use
 Python models, there would be shared memory region name conflicts that can
 result in segmentation faults or hangs. In order to avoid this issue, you need
@@ -1233,9 +1233,9 @@ class TritonPythonModel:
     input0 = pb_utils.Tensor.from_dlpack("INPUT0", to_dlpack(pytorch_tensor))
 ```
 Python backend allows tensors implementing
-[`__dlpack__`](https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.array.__dlpack__.html) 
-and [`__dlpack_device__`](https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.array.__dlpack_device__.html) 
-[interface](https://dmlc.github.io/dlpack/latest/python_spec.html) 
+[`__dlpack__`](https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.array.__dlpack__.html)
+and [`__dlpack_device__`](https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.array.__dlpack_device__.html)
+[interface](https://dmlc.github.io/dlpack/latest/python_spec.html)
 to be converted to Python backend tensors. For instance:
 
 ```python
@@ -1275,8 +1275,8 @@ this workflow.
 > **Note**
 >
 > Using a deep learning framework/package in a Python Backend model is
-> not necessarily the same as using the corresponding Triton Backend 
-> implementation. For example, the 
+> not necessarily the same as using the corresponding Triton Backend
+> implementation. For example, the
 > [PyTorch Backend](https://github.com/triton-inference-server/pytorch_backend)
 > is different from using a Python Backend model that uses `import torch`.
 > If you are seeing significantly different results from a model executed by
@@ -1289,31 +1289,31 @@ this workflow.
 For a simple example of using PyTorch in a Python Backend model, see the
 [AddSubNet PyTorch example](#addsubnet-in-pytorch).
 
-### PyTorch Determinism 
+### PyTorch Determinism
 
 When running PyTorch code, you may notice slight differences in output values
 across runs or across servers depending on hardware, system load, driver, or even
 batch size. These differences are generally related to the selection of CUDA
 kernels used to execute the operations, based on the factors mentioned.
 
 For most intents and purposes, these differences aren't large enough to affect
-a model's final prediction. However, to understand where these differences come 
+a model's final prediction. However, to understand where these differences come
 from, see this [doc](https://pytorch.org/docs/stable/notes/randomness.html).
 
 On Ampere devices and later, there is an optimization related to
-FP32 operations called 
+FP32 operations called
 [TensorFloat32 (TF32)](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/).
 Typically this optimization will improve overall performance at the cost of
 minor precision loss, but similarly this precision loss is acceptable for most
 model predictions. For more info on TF32 in PyTorch and how to enable/disable
-it as needed, see 
+it as needed, see
 [here](https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices).
 
 ## TensorFlow
 
 ### TensorFlow Determinism
 
-Similar to the PyTorch determinism section above, TensorFlow can have slight 
+Similar to the PyTorch determinism section above, TensorFlow can have slight
 differences in outputs based on various factors like hardware, system
 configurations, or batch sizes due to the library's internal CUDA kernel
 selection process. For more information on improving the determinism of outputs
@@ -1429,18 +1429,18 @@ You can find the complete example instructions in
 
 ## Model Instance Kind
 
-Triton model configuration allows users to provide kind to [instance group 
+Triton model configuration allows users to provide kind to [instance group
 settings.](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#instance-groups)
-A python backend model can be written to respect the kind setting to control 
+A python backend model can be written to respect the kind setting to control
 the execution of a model instance either on CPU or GPU.
 
-In the [model instance kind example](examples/instance_kind/README.md) 
+In the [model instance kind example](examples/instance_kind/README.md)
 we demonstrate how this can be achieved for your python model.
 
 ## Auto-complete config
 
 The auto-complete config example demonstrates how to use the
-`auto_complete_config` function to define 
+`auto_complete_config` function to define
 [minimal model configuration](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#minimal-model-configuration)
 when a configuration file is not available. You can find the complete example
 instructions in [examples/auto_complete](examples/auto_complete/README.md).

diff --git a/cmake/TritonPythonBackendConfig.cmake.in b/cmake/TritonPythonBackendConfig.cmake.in
@@ -1,4 +1,4 @@
-  
+
 # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without

diff --git a/examples/add_sub/client.py b/examples/add_sub/client.py
@@ -24,11 +24,11 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from tritonclient.utils import *
-import tritonclient.http as httpclient
 import sys
 
 import numpy as np
+import tritonclient.http as httpclient
+from tritonclient.utils import *
 
 model_name = "add_sub"
 shape = [4]
@@ -37,10 +37,12 @@
     input0_data = np.random.rand(*shape).astype(np.float32)
     input1_data = np.random.rand(*shape).astype(np.float32)
     inputs = [
-        httpclient.InferInput("INPUT0", input0_data.shape,
-                              np_to_triton_dtype(input0_data.dtype)),
-        httpclient.InferInput("INPUT1", input1_data.shape,
-                              np_to_triton_dtype(input1_data.dtype)),
+        httpclient.InferInput(
+            "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
+        ),
+        httpclient.InferInput(
+            "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
+        ),
     ]
 
     inputs[0].set_data_from_numpy(input0_data)
@@ -51,19 +53,22 @@
         httpclient.InferRequestedOutput("OUTPUT1"),
     ]
 
-    response = client.infer(model_name,
-                            inputs,
-                            request_id=str(1),
-                            outputs=outputs)
+    response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
 
     result = response.get_response()
     output0_data = response.as_numpy("OUTPUT0")
     output1_data = response.as_numpy("OUTPUT1")
 
-    print("INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
-        input0_data, input1_data, output0_data))
-    print("INPUT0 ({}) - INPUT1 ({}) = OUTPUT0 ({})".format(
-        input0_data, input1_data, output1_data))
+    print(
+        "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
+            input0_data, input1_data, output0_data
+        )
+    )
+    print(
+        "INPUT0 ({}) - INPUT1 ({}) = OUTPUT0 ({})".format(
+            input0_data, input1_data, output1_data
+        )
+    )
 
     if not np.allclose(input0_data + input1_data, output0_data):
         print("add_sub example error: incorrect sum")
@@ -73,5 +78,5 @@
         print("add_sub example error: incorrect difference")
         sys.exit(1)
 
-    print('PASS: add_sub')
+    print("PASS: add_sub")
     sys.exit(0)