From 3744b71bdc414851b5e6b98f2743b01b8401fa01 Mon Sep 17 00:00:00 2001 From: Lin Manhui Date: Fri, 8 Nov 2024 18:13:31 +0800 Subject: [PATCH] [Docs] Update deployment documentation (#2435) * Update * Refine docs * Add version control * basic inference->quick inference * Remove deprecated docs * Remove deprecated docs * Remove tutorial list in serving docs --- .../high_performance_inference.en.md | 98 ++++++++++------- .../high_performance_inference.md | 100 +++++++++++------- docs/pipeline_deploy/service_deploy.en.md | 78 +------------- docs/pipeline_deploy/service_deploy.md | 81 +------------- .../cv_pipelines/image_anomaly_detection.md | 14 +-- ...ocument_scene_information_extraction.en.md | 69 +++++++----- .../document_scene_information_extraction.md | 65 +++++++----- .../tutorials/ocr_pipelines/layout_parsing.md | 2 + .../serving/_pipeline_apps/ppchatocrv3.py | 54 +++++----- 9 files changed, 249 insertions(+), 312 deletions(-) diff --git a/docs/pipeline_deploy/high_performance_inference.en.md b/docs/pipeline_deploy/high_performance_inference.en.md index 6e34aecf8b..cf00d59fc5 100644 --- a/docs/pipeline_deploy/high_performance_inference.en.md +++ b/docs/pipeline_deploy/high_performance_inference.en.md @@ -8,11 +8,11 @@ In real-world production environments, many applications have stringent standard ## 1. Installation and Usage of High-Performance Inference Plugins -Before using the high-performance inference plugins, ensure you have completed the installation of PaddleX according to the [PaddleX Local Installation Tutorial](../installation/installation.en.md), and have successfully run the basic inference of the pipeline using either the PaddleX pipeline command line instructions or the Python script instructions. +Before using the high-performance inference plugins, ensure you have completed the installation of PaddleX according to the [PaddleX Local Installation Tutorial](../installation/installation.en.md), and have successfully run the quick inference of the pipeline using either the PaddleX pipeline command line instructions or the Python script instructions. ### 1.1 Installing High-Performance Inference Plugins -Find the corresponding installation command based on your processor architecture, operating system, device type, and Python version in the table below and execute it in your deployment environment: +Find the corresponding installation command based on your processor architecture, operating system, device type, and Python version in the table below and execute it in your deployment environment. Please replace `{paddlex version number}` with the actual paddlex version number, such as the current latest stable version `3.0.0b2`. If you need to use the version corresponding to the development branch, replace `{paddlex version number}` with `0.0.0.dev0`. @@ -29,33 +29,33 @@ Find the corresponding installation command based on your processor architecture - + - + - + - + - + - +
3.8curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device cpu --py 38curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device cpu --py 38
3.9curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device cpu --py 39curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device cpu --py 39
3.10curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device cpu --py 310curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device cpu --py 310
GPU (CUDA 11.8 + cuDNN 8.6) 3.8curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 38curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 38
3.9curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 39curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 39
3.10curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 310curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex version number}/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 310
-* When the device type is GPU, please use the installation instructions corresponding to the CUDA and cuDNN versions that match your environment. Otherwise, you will not be able to use the high-performance inference plugin properly. * For Linux systems, execute the installation instructions using Bash. +* When using NVIDIA GPUs, please use the installation instructions corresponding to the CUDA and cuDNN versions that match your environment. Otherwise, you will not be able to use the high-performance inference plugin properly. * When the device type is CPU, the installed high-performance inference plugin only supports inference using the CPU; for other device types, the installed high-performance inference plugin supports inference using the CPU or other devices. ### 1.2 Obtaining Serial Numbers and Activation @@ -77,37 +77,37 @@ Please note: Each serial number can only be bound to a unique device fingerprint ### 1.3 Enabling High-Performance Inference Plugins -Before enabling high-performance plugins, please ensure that the `LD_LIBRARY_PATH` of the current environment does not specify the TensorRT directory, as the plugins already integrate TensorRT to avoid conflicts caused by different TensorRT versions that may prevent the plugins from functioning properly. +For Linux systems, if using the high-performance inference plugin in a Docker container, please mount the host machine's `/dev/disk/by-uuid` and `${HOME}/.baidu/paddlex/licenses` directories to the container. For PaddleX CLI, specify `--use_hpip` and set the serial number to enable the high-performance inference plugin. If you wish to activate the license online, specify `--update_license` when using the serial number for the first time. Taking the general image classification pipeline as an example: -```diff +```bash paddlex \ --pipeline image_classification \ --input https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg \ --device gpu:0 \ -+ --use_hpip \ -+ --serial_number {serial_number} + --use_hpip \ + --serial_number {serial_number} -# If you wish to activate the license online +# If you wish to perform online activation paddlex \ --pipeline image_classification \ --input https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg \ --device gpu:0 \ -+ --use_hpip \ -+ --serial_number {serial_number} \ -+ --update_license + --use_hpip \ + --serial_number {serial_number} \ + --update_license ``` For PaddleX Python API, enabling the high-performance inference plugin is similar. Still taking the general image classification pipeline as an example: -```diff +```python from paddlex import create_pipeline pipeline = create_pipeline( pipeline="image_classification", -+ use_hpip=True, -+ serial_number="{serial_number}", + use_hpip=True, + hpi_params={"serial_number": "{serial_number}"}, ) output = pipeline.predict("https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg") @@ -117,35 +117,61 @@ The inference results obtained with the high-performance inference plugin enable ### 1.4 Modifying High-Performance Inference Configurations -PaddleX provides default high-performance inference configurations for each model and stores them in the model's configuration file. Due to the diversity of actual deployment environments, using the default configurations may not achieve ideal performance in specific environments or may even result in inference failures. For situations where the default configurations cannot meet requirements, you can try changing the model's inference backend as follows: +PaddleX combines model information and runtime environment information to provide default high-performance inference configurations for each model. These default configurations are carefully prepared to be applicable in several common scenarios and achieve relatively optimal performance. Therefore, users typically may not need to be concerned with the specific details of these configurations. However, due to the diversity of actual deployment environments and requirements, the default configuration may not yield ideal performance in certain scenarios and could even result in inference failures. In cases where the default configuration does not meet the requirements, users can manually adjust the configuration by modifying the Hpi field in the inference.yml file within the model directory (if this field does not exist, it needs to be added). The following are two common situations: -1. Locate the `inference.yml` file in the model directory and find the `Hpi` field. +- Switching inference backends: -2. Modify the value of `selected_backends`. Specifically, `selected_backends` may be set as follows: + When the default inference backend is not available, the inference backend needs to be switched manually. Users should modify the `selected_backends` field (if it does not exist, it needs to be added). ```yaml - selected_backends: + Hpi: + ... + selected_backends: cpu: paddle_infer gpu: onnx_runtime + ... ``` - Each entry is formatted as `{device_type}: {inference_backend_name}`. The default selects the backend with the shortest inference time in the official test environment. `supported_backends` lists the inference backends supported by the model in the official test environment for reference. + Each entry should follow the format `{device type}: {inference backend name}`. The currently available inference backends are: - * `paddle_infer`: The standard Paddle Inference engine. Supports CPU and GPU. - * `paddle_tensorrt`: [Paddle-TensorRT](https://www.paddlepaddle.org.cn/lite/v2.10/optimize/paddle_trt.html), a high-performance deep learning inference library produced by Paddle, which integrates TensorRT in the form of subgraphs for further optimization and acceleration. Supports GPU only. - * `openvino`: [OpenVINO](https://github.com/openvinotoolkit/openvino), a deep learning inference tool provided by Intel, optimized for model inference performance on various Intel hardware. Supports CPU only. - * `onnx_runtime`: [ONNX Runtime](https://onnxruntime.ai/), a cross-platform, high-performance inference engine. Supports CPU and GPU. - * `tensorrt`: [TensorRT](https://developer.nvidia.com/tensorrt), a high-performance deep learning inference library provided by NVIDIA, optimized for NVIDIA GPUs to improve speed. Supports GPU only. + * `paddle_infer`: The Paddle Inference engine. Supports CPU and GPU. Compared to the PaddleX quick inference, TensorRT subgraphs can be integrated to enhance inference performance on GPUs. + * `openvino`: [OpenVINO](https://github.com/openvinotoolkit/openvino), a deep learning inference tool provided by Intel, optimized for model inference performance on various Intel hardware. Supports CPU only. The high-performance inference plugin automatically converts the model to the ONNX format and uses this engine for inference. + * `onnx_runtime`: [ONNX Runtime](https://onnxruntime.ai/), a cross-platform, high-performance inference engine. Supports CPU and GPU. The high-performance inference plugin automatically converts the model to the ONNX format and uses this engine for inference. + * `tensorrt`: [TensorRT](https://developer.nvidia.com/tensorrt), a high-performance deep learning inference library provided by NVIDIA, optimized for NVIDIA GPUs to improve speed. Supports GPU only. The high-performance inference plugin automatically converts the model to the ONNX format and uses this engine for inference. - Here are some key details of the current official test environment: +- Modifying dynamic shape configurations for Paddle Inference or TensorRT: - * CPU: Intel Xeon Gold 5117 - * GPU: NVIDIA Tesla T4 - * CUDA Version: 11.8 - * cuDNN Version: 8.6 - * Docker:registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82 + Dynamic shape is the ability of TensorRT to defer specifying parts or all of a tensor’s dimensions until runtime. If the default dynamic shape configuration does not meet requirements (e.g., the model may require input shapes beyond the default range), users need to modify the `trt_dynamic_shapes` or `dynamic_shapes` field in the inference backend configuration: + + ```yaml + Hpi: + ... + backend_configs: + # Configuration for the Paddle Inference backend + paddle_infer: + ... + trt_dynamic_shapes: + x: + - [1, 3, 300, 300] + - [4, 3, 300, 300] + - [32, 3, 1200, 1200] + ... + # Configuration for the TensorRT backend + tensorrt: + ... + dynamic_shapes: + x: + - [1, 3, 300, 300] + - [4, 3, 300, 300] + - [32, 3, 1200, 1200] + ... + ``` + + In `trt_dynamic_shapes` or `dynamic_shapes`, each input tensor requires a specified dynamic shape in the format: `{input tensor name}: [{minimum shape}, [{optimal shape}], [{maximum shape}]]`. For details on minimum, optimal, and maximum shapes and further information, please refer to the official TensorRT documentation. + + After completing the modifications, please delete the cache files in the model directory (`shape_range_info.pbtxt` and files starting with `trt_serialized`). ## 2. Pipelines and Models Supporting High-Performance Inference Plugins diff --git a/docs/pipeline_deploy/high_performance_inference.md b/docs/pipeline_deploy/high_performance_inference.md index 6390da117b..92b7a59ca5 100644 --- a/docs/pipeline_deploy/high_performance_inference.md +++ b/docs/pipeline_deploy/high_performance_inference.md @@ -8,11 +8,11 @@ comments: true ## 1.高性能推理插件的安装与使用 -使用高性能推理插件前,请确保您已经按照[PaddleX本地安装教程](../installation/installation.md) 完成了PaddleX的安装,且按照PaddleX产线命令行使用说明或PaddleX产线Python脚本使用说明跑通了产线的基本推理。 +使用高性能推理插件前,请确保您已经按照[PaddleX本地安装教程](../installation/installation.md) 完成了PaddleX的安装,且按照PaddleX产线命令行使用说明或PaddleX产线Python脚本使用说明跑通了产线的快速推理。 ### 1.1 安装高性能推理插件 -在下表中根据处理器架构、操作系统、设备类型、Python 版本等信息,找到对应的安装指令并在部署环境中执行: +在下表中根据处理器架构、操作系统、设备类型、Python 版本等信息,找到对应的安装指令并在部署环境中执行。请将 `{paddlex 版本号}` 替换为实际的 paddlex 的版本号,例如当前最新的稳定版本 `3.0.0b2`。如果需要使用开发分支对应的版本,请将 `{paddlex 版本号}` 替换为 `0.0.0.dev0`。 @@ -29,33 +29,33 @@ comments: true - + - + - + - + - + - +
3.8curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device cpu --py 38curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device cpu --py 38
3.9curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device cpu --py 39curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device cpu --py 39
3.10curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device cpu --py 310curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device cpu --py 310
GPU (CUDA 11.8 + cuDNN 8.6) 3.8curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 38curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.8 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 38
3.9curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 39curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.9 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 39
3.10curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/latest/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 310curl -s https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/paddlex_hpi/install_script/{paddlex 版本号}/install_paddlex_hpi.py | python3.10 - --arch x86_64 --os linux --device gpu_cuda118_cudnn86 --py 310
-* 当设备类型为 GPU 时,请使用与环境匹配的 CUDA 和 cuDNN 版本对应的安装指令,否则,将无法正常使用高性能推理插件。 * 对于 Linux 系统,使用 Bash 执行安装指令。 +* 当使用 NVIDIA GPU 时,请使用与环境匹配的 CUDA 和 cuDNN 版本对应的安装指令,否则,将无法正常使用高性能推理插件。 * 当设备类型为 CPU 时,安装的高性能推理插件仅支持使用 CPU 进行推理;对于其他设备类型,安装的高性能推理插件则支持使用 CPU 或其他设备进行推理。 ### 1.2 获取序列号与激活 @@ -76,37 +76,37 @@ comments: true ### 1.3 启用高性能推理插件 -在启用高性能插件前,请确保当前环境的 `LD_LIBRARY_PATH` 没有指定 TensorRT 的共享库目录,因为插件中已经集成了 TensorRT,避免 TensorRT 版本冲突导致插件无法正常使用。 +对于 Linux 系统,如果在 Docker 容器中使用高性能推理插件,请为容器挂载宿主机的 `/dev/disk/by-uuid` 与 `${HOME}/.baidu/paddlex/licenses` 目录。 对于 PaddleX CLI,指定 `--use_hpip`,并设置序列号,即可启用高性能推理插件。如果希望进行联网激活,在第一次使用序列号时,需指定 `--update_license`,以通用图像分类产线为例: -```diff +```bash paddlex \ --pipeline image_classification \ --input https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg \ --device gpu:0 \ -+ --use_hpip \ -+ --serial_number {序列号} + --use_hpip \ + --serial_number {序列号} # 如果希望进行联网激活 paddlex \ --pipeline image_classification \ --input https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg \ --device gpu:0 \ -+ --use_hpip \ -+ --serial_number {序列号} -+ --update_license + --use_hpip \ + --serial_number {序列号} + --update_license ``` 对于 PaddleX Python API,启用高性能推理插件的方法类似。仍以通用图像分类产线为例: -```diff +```python from paddlex import create_pipeline pipeline = create_pipeline( pipeline="image_classification", -+ use_hpip=True, -+ serial_number="{序列号}", + use_hpip=True, + hpi_params={"serial_number": "{序列号}"}, ) output = pipeline.predict("https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_image_classification_001.jpg") @@ -116,33 +116,61 @@ output = pipeline.predict("https://paddle-model-ecology.bj.bcebos.com/paddlex/im ### 1.4 修改高性能推理配置 -PaddleX 为每个模型提供默认的高性能推理配置,并将其存储在模型的配置文件中。由于实际部署环境的多样性,使用默认配置可能无法在特定环境中获取理想的性能,甚至可能出现推理失败的情况。对于默认配置无法满足要求的情形,可以通过如下方式,尝试更换模型的推理后端: +PaddleX 结合模型信息与运行环境信息为每个模型提供默认的高性能推理配置。这些默认配置经过精心准备,以便在数个常见场景中可用,且能够取得较优的性能。因此,通常用户可能并不用关心如何这些配置的具体细节。然而,由于实际部署环境与需求的多样性,使用默认配置可能无法在特定场景获取理想的性能,甚至可能出现推理失败的情况。对于默认配置无法满足要求的情形,用户可以通过修改模型目录中 `inference.yml` 文件中 `Hpi` 字段(如果该字段不存在,需要新增)的方式,手动调整配置。以下列举两种常见的情形: -1. 找到模型目录中的 `inference.yml` 文件,定位到其中的 `Hpi` 字段; -2. 修改 `selected_backends` 的值。具体而言,`selected_backends` 可能被设置如下: +- 更换推理后端: - ``` - selected_backends: + 当默认的推理后端不可用时,需要手动更换推理后端。用户需要修改 `selected_backends` 字段(如果不存在,需要新增)。 + + ```yaml + Hpi: + ... + selected_backends: cpu: paddle_infer gpu: onnx_runtime + ... ``` - 其中每一项均按照 `{设备类型}: {推理后端名称}` 的格式填写,默认选用在官方测试环境中推理耗时最短的后端。`supported_backends` 中记录了官方测试环境中模型支持的推理后端,可供参考。 + 其中每一项均按照 `{设备类型}: {推理后端名称}` 的格式填写。 + 目前所有可选的推理后端如下: - * `paddle_infer`:标准的 Paddle Inference 推理引擎。支持 CPU 和 GPU。 - * `paddle_tensorrt`:[Paddle-TensorRT](https://www.paddlepaddle.org.cn/lite/v2.10/optimize/paddle_trt.html),Paddle 官方出品的高性能深度学习推理库,采用子图的形式对 TensorRT 进行了集成,以实现进一步优化加速。仅支持 GPU。 - * `openvino`:[OpenVINO](https://github.com/openvinotoolkit/openvino),Intel 提供的深度学习推理工具,优化了多种 Intel 硬件上的模型推理性能。仅支持 CPU。 - * `onnx_runtime`:[ONNX Runtime](https://onnxruntime.ai/),跨平台、高性能的推理引擎。支持 CPU 和 GPU。 - * `tensorrt`:[TensorRT](https://developer.nvidia.com/tensorrt),NVIDIA 提供的高性能深度学习推理库,针对 NVIDIA GPU 进行优化以提升速度。仅支持 GPU。 + * `paddle_infer`:Paddle Inference 推理引擎。支持 CPU 和 GPU。相比 PaddleX 快速推理,高性能推理插件支持以集成 TensorRT 子图的方式提升模型的 GPU 推理性能。 + * `openvino`:[OpenVINO](https://github.com/openvinotoolkit/openvino),Intel 提供的深度学习推理工具,优化了多种 Intel 硬件上的模型推理性能。仅支持 CPU。高性能推理插件自动将模型转换为 ONNX 格式后用该引擎推理。 + * `onnx_runtime`:[ONNX Runtime](https://onnxruntime.ai/),跨平台、高性能的推理引擎。支持 CPU 和 GPU。高性能推理插件自动将模型转换为 ONNX 格式后用该引擎推理。 + * `tensorrt`:[TensorRT](https://developer.nvidia.com/tensorrt),NVIDIA 提供的高性能深度学习推理库,针对 NVIDIA GPU 进行优化以提升速度。仅支持 GPU。高性能推理插件自动将模型转换为 ONNX 格式后用该引擎推理。 + +- 修改 Paddle Inference 或 TensorRT 的动态形状配置: + + 动态形状是 TensorRT 延迟指定部分或全部张量维度直到运行时的能力。当默认的动态形状配置无法满足需求(例如,模型可能需要范围外的输入形状),用户需要修改状推理后端配置中的 `trt_dynamic_shapes` 或 `dynamic_shapes` 字段: + + ```yaml + Hpi: + ... + backend_configs: + # Paddle Inference 后端配置 + paddle_infer: + ... + trt_dynamic_shapes: + x: + - [1, 3, 300, 300] + - [4, 3, 300, 300] + - [32, 3, 1200, 1200] + ... + # TensorRT 后端配置 + tensorrt: + ... + dynamic_shapes: + x: + - [1, 3, 300, 300] + - [4, 3, 300, 300] + - [32, 3, 1200, 1200] + ... + ``` - 以下是目前的官方测试环境的部分关键信息: + 在 `trt_dynamic_shapes` 或 `dynamic_shapes` 中,需要为每一个输入张量指定动态形状,格式为:`{输入张量名称}: [{最小形状}, [{最优形状}], [{最大形状}]]`。有关最小形状、最优形状以及最大形状的相关介绍及更多细节,请参考 TensorRT 官方文档。 - * CPU:Intel Xeon Gold 5117 - * GPU:NVIDIA Tesla T4 - * CUDA版本:11.8 - * cuDNN版本:8.6 - * Docker 镜像:registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.8-cudnn8.6-trt8.5-gcc82 + 在完成修改后,请删除模型目录中的缓存文件(`shape_range_info.pbtxt` 与 `trt_serialized` 开头的文件)。 ## 2、支持使用高性能推理插件的产线与模型 diff --git a/docs/pipeline_deploy/service_deploy.en.md b/docs/pipeline_deploy/service_deploy.en.md index 2da3f6329d..afc27ae68a 100644 --- a/docs/pipeline_deploy/service_deploy.en.md +++ b/docs/pipeline_deploy/service_deploy.en.md @@ -46,7 +46,7 @@ INFO: Application startup complete. INFO: Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit) ``` -`--pipeline` can be specified as an official pipeline name or the path to a local pipeline configuration file. PaddleX uses this to build the pipeline and deploy it as a service. To adjust configurations (such as model path, batch_size, deployment device), please refer to the "Model Application" section in the [General Image Classification Pipeline Tutorial](../pipeline_usage/tutorials/cv_pipelines/image_classification.en.md) (for other pipelines, refer to the corresponding tutorials in the "1.3 Calling the Service" table). +`--pipeline` can be specified as an official pipeline name or the path to a local pipeline configuration file. PaddleX uses this to build the pipeline and deploy it as a service. To adjust configurations (such as model path, batch_size, deployment device), please refer to the "Model Application" section in the [General Image Classification Pipeline Tutorial](../pipeline_usage/tutorials/cv_pipelines/image_classification.en.md). Command-line options related to serving deployment are as follows: @@ -92,82 +92,8 @@ Command-line options related to serving deployment are as follows: ### 1.3 Call the Service -Please refer to the "Development Integration/Deployment" section in the usage tutorials for each pipeline. +Please refer to the "Development Integration/Deployment" section in the usage tutorials for each pipeline. You can find the pipeline tutorials [here](../pipeline_usage/pipeline_develop_guide.en.md). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Model PipelinesUsage Tutorials
General Image Classification PipelineTutorial for Using the General Image Classification Pipeline
General Object Detection PipelineTutorial for Using the General Object Detection Pipeline
General Semantic Segmentation PipelineTutorial for Using the General Semantic Segmentation Pipeline
General Instance Segmentation PipelineTutorial for Using the General Instance Segmentation Pipeline
General Image Multi-Label Classification PipelineTutorial for Using the General Image Multi-Label Classification Pipeline
Small Object Detection PipelineTutorial for Using the Small Object Detection Pipeline
Image Anomaly Detection PipelineTutorial for Using the Image Anomaly Detection Pipeline
General OCR PipelineTutorial for Using the General OCR Pipeline
General Table Recognition PipelineTutorial for Using the General Table Recognition Pipeline
General Layout Parsing PipelineTutorial for Using the Layout Parsing Recognition Pipeline
Formula Recognition PipelineTutorial for Using the Formula Recognition Pipeline
Seal Text Recognition PipelineTutorial for Using the Seal Text Recognition Pipeline
Time Series Forecasting PipelineTutorial for Using the Time Series Forecasting Pipeline
Time Series Anomaly Detection PipelineTutorial for Using the Time Series Anomaly Detection Pipeline
Time Series Classification PipelineTutorial for Using the Time Series Classification Pipeline
Document Scene Information Extraction v3 PipelineTutorial for Using the Document Scene Information Extraction v3 Pipeline
## 2. Deploy Services for Production When deploying services into production environments, the stability, efficiency, and security of the services are of paramount importance. Below are some recommendations for deploying services into production. diff --git a/docs/pipeline_deploy/service_deploy.md b/docs/pipeline_deploy/service_deploy.md index 1a3526ac83..eaf6017582 100644 --- a/docs/pipeline_deploy/service_deploy.md +++ b/docs/pipeline_deploy/service_deploy.md @@ -37,7 +37,7 @@ paddlex --serve --pipeline {产线名称或产线配置文件路径} [{其他命 paddlex --serve --pipeline image_classifcation ``` -服务启动成功后,可以看到类似如下展示的信息: +服务启动成功后,可以看到类似以下展示的信息: ``` INFO: Started server process [63108] @@ -46,7 +46,8 @@ INFO: Application startup complete. INFO: Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit) ``` ---pipeline可指定为官方产线名称或本地产线配置文件路径。PaddleX 以此构建产线并部署为服务。如需调整配置(如模型路径、batch_size、部署设备等),请参考[通用图像分类产线使用教程](../pipeline_usage/tutorials/cv_pipelines/image_classification.md)中的 “模型应用” 部分(对于其他产线,可参考 “1.3 调用服务” 表格中的对应教程)。 +--pipeline可指定为官方产线名称或本地产线配置文件路径。PaddleX 以此构建产线并部署为服务。如需调整配置(如模型路径、batch_size、部署设备等),请参考[通用图像分类产线使用教程](../pipeline_usage/tutorials/cv_pipelines/image_classification.md)中的 “模型应用 部分。 + 与服务化部署相关的命令行选项如下: @@ -91,82 +92,8 @@ INFO: Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit) ### 1.3 调用服务 -请参考各产线使用教程中的 “开发集成/部署” 部分。 +请参考各产线使用教程中的 “开发集成/部署” 部分。在[此处](../pipeline_usage/pipeline_develop_guide.md)可以找到各产线的使用教程。 -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
模型产线使用教程
通用图像分类产线通用图像分类产线使用教程
通用目标检测产线通用目标检测产线使用教程
通用语义分割产线通用语义分割产线使用教程
通用实例分割产线通用实例分割产线使用教程
通用图像多标签分类产线通用图像多标签分类产线使用教程
小目标检测产线小目标检测产线使用教程
图像异常检测产线图像异常检测产线使用教程
通用OCR产线通用OCR产线使用教程
通用表格识别产线通用表格识别产线使用教程
通用版面解析产线通用版面解析产线使用教程
公式识别产线公式识别产线使用教程
印章文本识别产线印章文本识别产线使用教程
时序预测产线时序预测产线使用教程
时序异常检测产线时序异常检测产线使用教程
时序分类产线时序分类产线使用教程
文档场景信息抽取v3产线文档场景信息抽取v3产线使用教程
## 2、将服务用于生产 将服务应用于生产环境中时,服务的稳定性、高效性与安全性尤为重要。以下是一些针对将服务用于生产的建议。 diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.md b/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.md index bf99c9c4e9..b2c6b1bc9d 100644 --- a/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.md +++ b/docs/pipeline_usage/tutorials/cv_pipelines/image_anomaly_detection.md @@ -348,7 +348,7 @@ for res in output:
import base64
 import requests
 
-API_URL = "http://localhost:8080/anomaly-detection" # 服务URL
+API_URL = "http://localhost:8080/image-anomaly-detection" # 服务URL
 image_path = "./demo.jpg"
 output_image_path = "./out.jpg"
 
@@ -405,7 +405,7 @@ int main() {
     std::string body = jsonObj.dump();
 
     // 调用API
-    auto response = client.Post("/anomaly-detection", headers, body, "application/json");
+    auto response = client.Post("/image-anomaly-detection", headers, body, "application/json");
     // 处理接口返回数据
     if (response && response->status == 200) {
         nlohmann::json jsonResponse = nlohmann::json::parse(response->body);
@@ -446,7 +446,7 @@ import java.util.Base64;
 
 public class Main {
     public static void main(String[] args) throws IOException {
-        String API_URL = "http://localhost:8080/anomaly-detection"; // 服务URL
+        String API_URL = "http://localhost:8080/image-anomaly-detection"; // 服务URL
         String imagePath = "./demo.jpg"; // 本地图像
         String outputImagePath = "./out.jpg"; // 输出图像
 
@@ -505,7 +505,7 @@ import (
 )
 
 func main() {
-    API_URL := "http://localhost:8080/anomaly-detection"
+    API_URL := "http://localhost:8080/image-anomaly-detection"
     imagePath := "./demo.jpg"
     outputImagePath := "./out.jpg"
 
@@ -585,7 +585,7 @@ using Newtonsoft.Json.Linq;
 
 class Program
 {
-    static readonly string API_URL = "http://localhost:8080/anomaly-detection";
+    static readonly string API_URL = "http://localhost:8080/image-anomaly-detection";
     static readonly string imagePath = "./demo.jpg";
     static readonly string outputImagePath = "./out.jpg";
 
@@ -623,7 +623,7 @@ class Program
 
const axios = require('axios');
 const fs = require('fs');
 
-const API_URL = 'http://localhost:8080/anomaly-detection'
+const API_URL = 'http://localhost:8080/image-anomaly-detection'
 const imagePath = './demo.jpg'
 const outputImagePath = "./out.jpg";
 
@@ -663,7 +663,7 @@ axios.request(config)
 
 
<?php
 
-$API_URL = "http://localhost:8080/anomaly-detection"; // 服务URL
+$API_URL = "http://localhost:8080/image-anomaly-detection"; // 服务URL
 $image_path = "./demo.jpg";
 $output_image_path = "./out.jpg";
 
diff --git a/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.en.md b/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.en.md
index 65f3ebca2f..3200d476fa 100644
--- a/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.en.md
+++ b/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.en.md
@@ -670,7 +670,7 @@ Below are the API references and multi-language service invocation examples:
 
 

Operations provided by the service are as follows:

    -
  • analyzeImage
  • +
  • analyzeImages

Analyze images using computer vision models to obtain OCR, table recognition results, and extract key information from the images.

POST /chatocr-vision

@@ -875,7 +875,7 @@ Below are the API references and multi-language service invocation examples: visionInfo object -Key information from the image. Provided by the analyzeImage operation. +Key information from the image. Provided by the analyzeImages operation. Yes @@ -904,11 +904,16 @@ Below are the API references and multi-language service invocation examples: -

Currently, llmParams can take the following form:

+

Currently, llmParams can take one of the following forms:

{
 "apiType": "qianfan",
-"apiKey": "{qianfan API key}",
-"secretKey": "{qianfan secret key}"
+"apiKey": "{Qianfan Platform API key}",
+"secretKey": "{Qianfan Platform secret key}"
+}
+
+
{
+"apiType": "aistudio",
+"accessToken": "{AI Studio access token}"
 }
 
    @@ -974,13 +979,18 @@ Below are the API references and multi-language service invocation examples: -

    Currently, llmParams can take the following form:

    +

    Currently, llmParams can take one of the following forms:

    {
     "apiType": "qianfan",
     "apiKey": "{Qianfan Platform API key}",
     "secretKey": "{Qianfan Platform secret key}"
     }
     
    +
    {
    +"apiType": "aistudio",
    +"accessToken": "{AI Studio access token}"
    +}
    +
    • When the request is processed successfully, the result in the response body has the following properties:
    @@ -1027,43 +1037,37 @@ Below are the API references and multi-language service invocation examples: visionInfo object -Key information from images. Provided by the analyzeImage operation. +Key information from images. Provided by the analyzeImages operation. Yes -taskDescription +vectorStore string -Task prompt. +Serialized result of the vector database. Provided by the buildVectorStore operation. No -rules +retrievalResult string -Custom extraction rules, e.g., for output formatting. +Results of knowledge retrieval. Provided by the retrieveKnowledge operation. No -fewShot +taskDescription string -Example prompts. +Task prompt. No -vectorStore +rules string -Serialized result of the vector database. Provided by the buildVectorStore operation. +Custom extraction rules, e.g., for output formatting. No -retrievalResult +fewShot string -Results of knowledge retrieval. Provided by the retrieveKnowledge operation. -No - - -returnPrompts -boolean -Whether to return the prompts used. Enabled by default. +Example prompts. No @@ -1078,15 +1082,26 @@ Below are the API references and multi-language service invocation examples: API parameters for the large language model. No + +returnPrompts +boolean +Whether to return the prompts used. Enabled by default. +No + -

    Currently, llmParams can take the following form:

    +

    Currently, llmParams can take one of the following forms:

    {
     "apiType": "qianfan",
     "apiKey": "{Qianfan Platform API key}",
     "secretKey": "{Qianfan Platform secret key}"
     }
     
    +
    {
    +"apiType": "aistudio",
    +"accessToken": "{AI Studio access token}"
    +}
    +
    • On successful request processing, the result in the response body has the following properties:
    @@ -1231,14 +1246,14 @@ result_retrieval = resp_retrieval.json()["result"] payload = { "keys": keys, "visionInfo": result_vision["visionInfo"], + "vectorStore": result_vector["vectorStore"], + "retrievalResult": result_retrieval["retrievalResult"], "taskDescription": "", "rules": "", "fewShot": "", - "vectorStore": result_vector["vectorStore"], - "retrievalResult": result_retrieval["retrievalResult"], - "returnPrompts": True, "llmName": LLM_NAME, "llmParams": LLM_PARAMS, + "returnPrompts": True, } resp_chat = requests.post(url=f"{API_BASE_URL}/chatocr-chat", json=payload) if resp_chat.status_code != 200: diff --git a/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.md b/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.md index f5dd3ba034..21cfcd13b7 100644 --- a/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.md +++ b/docs/pipeline_usage/tutorials/information_extraction_pipelines/document_scene_information_extraction.md @@ -639,7 +639,7 @@ chat_result.print()

    服务提供的操作如下:

      -
    • analyzeImage
    • +
    • analyzeImages

    使用计算机视觉模型对图像进行分析,获得OCR、表格识别结果等,并提取图像中的关键信息。

    POST /chatocr-vision

    @@ -844,7 +844,7 @@ chat_result.print() visionInfo object -图像中的关键信息。由analyzeImage操作提供。 +图像中的关键信息。由analyzeImages操作提供。 是 @@ -873,13 +873,18 @@ chat_result.print() -

    当前,llmParams 可以采用如下形式:

    +

    当前,llmParams 可以采用如下形式之一:

    {
     "apiType": "qianfan",
     "apiKey": "{千帆平台API key}",
     "secretKey": "{千帆平台secret key}"
     }
     
    +
    {
    +"apiType": "aistudio",
    +"accessToken": "{星河社区access token}"
    +}
    +
    • 请求处理成功时,响应体的result具有如下属性:
    @@ -943,13 +948,18 @@ chat_result.print() -

    当前,llmParams 可以采用如下形式:

    +

    当前,llmParams 可以采用如下形式之一:

    {
     "apiType": "qianfan",
     "apiKey": "{千帆平台API key}",
     "secretKey": "{千帆平台secret key}"
     }
     
    +
    {
    +"apiType": "aistudio",
    +"accessToken": "{星河社区access token}"
    +}
    +
    • 请求处理成功时,响应体的result具有如下属性:
    @@ -996,43 +1006,37 @@ chat_result.print() visionInfo object -图像中的关键信息。由analyzeImage操作提供。 +图像中的关键信息。由analyzeImages操作提供。 是 -taskDescription +vectorStore string -提示词任务。 +向量数据库序列化结果。由buildVectorStore操作提供。 否 -rules +retrievalResult string -提示词规则。用于自定义信息抽取规则,例如规范输出格式。 +知识检索结果。由retrieveKnowledge操作提供。 否 -fewShot +taskDescription string -提示词示例。 +提示词任务。 否 -vectorStore +rules string -向量数据库序列化结果。由buildVectorStore操作提供。 +提示词规则。用于自定义信息抽取规则,例如规范输出格式。 否 -retrievalResult +fewShot string -知识检索结果。由retrieveKnowledge操作提供。 -否 - - -returnPrompts -boolean -是否返回使用的提示词。默认启用。 +提示词示例。 否 @@ -1047,15 +1051,26 @@ chat_result.print() 大语言模型API参数。 否 + +returnPrompts +boolean +是否返回使用的提示词。默认禁用。 +否 + -

    当前,llmParams 可以采用如下形式:

    +

    当前,llmParams 可以采用如下形式之一:

    {
     "apiType": "qianfan",
     "apiKey": "{千帆平台API key}",
     "secretKey": "{千帆平台secret key}"
     }
     
    +
    {
    +"apiType": "aistudio",
    +"accessToken": "{星河社区access token}"
    +}
    +
    • 请求处理成功时,响应体的result具有如下属性:
    @@ -1201,14 +1216,14 @@ result_retrieval = resp_retrieval.json()["result"] payload = { "keys": keys, "visionInfo": result_vision["visionInfo"], + "vectorStore": result_vector["vectorStore"], + "retrievalResult": result_retrieval["retrievalResult"], "taskDescription": "", "rules": "", "fewShot": "", - "vectorStore": result_vector["vectorStore"], - "retrievalResult": result_retrieval["retrievalResult"], - "returnPrompts": True, "llmName": LLM_NAME, "llmParams": LLM_PARAMS, + "returnPrompts": True, } resp_chat = requests.post(url=f"{API_BASE_URL}/chatocr-chat", json=payload) if resp_chat.status_code != 200: diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md b/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md index c231307d2f..8784717f08 100644 --- a/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md +++ b/docs/pipeline_usage/tutorials/ocr_pipelines/layout_parsing.md @@ -744,6 +744,8 @@ import requests API_URL = "http://localhost:8080/layout-parsing" # 服务URL +image_path = "./demo.jpg" + # 对本地图像进行Base64编码 with open(image_path, "rb") as file: image_bytes = file.read() diff --git a/paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py b/paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py index 970570c68e..c84cea8888 100644 --- a/paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py +++ b/paddlex/inference/pipelines/serving/_pipeline_apps/ppchatocrv3.py @@ -45,7 +45,7 @@ class InferenceParams(BaseModel): maxLongSide: Optional[Annotated[int, Field(gt=0)]] = None -class AnalyzeImageRequest(BaseModel): +class AnalyzeImagesRequest(BaseModel): file: str fileType: Optional[FileType] = None useImgOrientationCls: bool = True @@ -78,22 +78,22 @@ class VisionResult(BaseModel): layoutImage: str -class AnalyzeImageResult(BaseModel): +class AnalyzeImagesResult(BaseModel): visionResults: List[VisionResult] visionInfo: dict -class AIStudioParams(BaseModel): - accessToken: str - apiType: Literal["aistudio"] = "aistudio" - - class QianfanParams(BaseModel): apiKey: str secretKey: str apiType: Literal["qianfan"] = "qianfan" +class AIStudioParams(BaseModel): + accessToken: str + apiType: Literal["aistudio"] = "aistudio" + + LLMName: TypeAlias = Literal[ "ernie-3.5", "ernie-3.5-8k", @@ -105,7 +105,7 @@ class QianfanParams(BaseModel): "ernie-tiny-8k", "ernie-char-8k", ] -LLMParams: TypeAlias = Union[AIStudioParams, QianfanParams] +LLMParams: TypeAlias = Union[QianfanParams, AIStudioParams] class BuildVectorStoreRequest(BaseModel): @@ -134,14 +134,14 @@ class RetrieveKnowledgeResult(BaseModel): class ChatRequest(BaseModel): keys: List[str] visionInfo: dict + vectorStore: Optional[str] = None + retrievalResult: Optional[str] = None taskDescription: Optional[str] = None rules: Optional[str] = None fewShot: Optional[str] = None - vectorStore: Optional[str] = None - retrievalResult: Optional[str] = None - returnPrompts: bool = True llmName: Optional[LLMName] = None llmParams: Optional[Annotated[LLMParams, Field(discriminator="apiType")]] = None + returnPrompts: bool = False class Prompts(BaseModel): @@ -196,14 +196,14 @@ def _infer_file_type(url: str) -> FileType: def _llm_params_to_dict(llm_params: LLMParams) -> dict: - if llm_params.apiType == "aistudio": - return {"api_type": "aistudio", "access_token": llm_params.accessToken} - elif llm_params.apiType == "qianfan": + if llm_params.apiType == "qianfan": return { "api_type": "qianfan", "ak": llm_params.apiKey, "sk": llm_params.secretKey, } + if llm_params.apiType == "aistudio": + return {"api_type": "aistudio", "access_token": llm_params.accessToken} else: assert_never(llm_params.apiType) @@ -265,12 +265,12 @@ def create_pipeline_app(pipeline: PPChatOCRPipeline, app_config: AppConfig) -> F @app.post( "/chatocr-vision", - operation_id="analyzeImage", + operation_id="analyzeImages", responses={422: {"model": Response}}, ) - async def _analyze_image( - request: AnalyzeImageRequest, - ) -> ResultResponse[AnalyzeImageResult]: + async def _analyze_images( + request: AnalyzeImagesRequest, + ) -> ResultResponse[AnalyzeImagesResult]: pipeline = ctx.pipeline aiohttp_session = ctx.aiohttp_session @@ -371,7 +371,7 @@ async def _analyze_image( logId=serving_utils.generate_log_id(), errorCode=0, errorMsg="Success", - result=AnalyzeImageResult( + result=AnalyzeImagesResult( visionResults=vision_results, visionInfo=result[1], ), @@ -395,8 +395,6 @@ async def _build_vector_store( kwargs = {"visual_info": results.VisualInfoResult(request.visionInfo)} if request.minChars is not None: kwargs["min_characters"] = request.minChars - else: - kwargs["min_characters"] = 0 if request.llmRequestInterval is not None: kwargs["llm_request_interval"] = request.llmRequestInterval if request.llmName is not None: @@ -470,23 +468,23 @@ async def _chat( "key_list": request.keys, "visual_info": results.VisualInfoResult(request.visionInfo), } - if request.taskDescription is not None: - kwargs["user_task_description"] = request.taskDescription - if request.rules is not None: - kwargs["rules"] = request.rules - if request.fewShot is not None: - kwargs["few_shot"] = request.fewShot if request.vectorStore is not None: kwargs["vector"] = results.VectorResult({"vector": request.vectorStore}) if request.retrievalResult is not None: kwargs["retrieval_result"] = results.RetrievalResult( {"retrieval": request.retrievalResult} ) - kwargs["save_prompt"] = request.returnPrompts + if request.taskDescription is not None: + kwargs["user_task_description"] = request.taskDescription + if request.rules is not None: + kwargs["rules"] = request.rules + if request.fewShot is not None: + kwargs["few_shot"] = request.fewShot if request.llmName is not None: kwargs["llm_name"] = request.llmName if request.llmParams is not None: kwargs["llm_params"] = _llm_params_to_dict(request.llmParams) + kwargs["save_prompt"] = request.returnPrompts result = await serving_utils.call_async(pipeline.pipeline.chat, **kwargs)