From 8fd61e3634c3255e790e1a7c444e35576c5937e9 Mon Sep 17 00:00:00 2001 From: Jack Zhou Date: Tue, 8 Nov 2022 10:54:59 +0800 Subject: [PATCH] [Model] Add text classification task for ernie-3.0 (#430) * move text_cls to ernie-3.0 * Add main page of ernie-3.0 * rename infer -> seq_cls_infer * Fix the links * Add ernie-3.0 python, cpp readme * Fix some cpp readme * Add fastdeploy::FDERROR * Add python readme for ernie-3.0 * update README.md * Add empty line * update readme * Fix readme * remove the - from ernie 3.0 * ernie-3.0 -> ernie 3.0 * Use AutoTokenizer to tokenize * Ernie -> ERNIE --- examples/text/ernie-3.0/README.md | 39 +++ examples/text/ernie-3.0/cpp/CMakeLists.txt | 26 ++ examples/text/ernie-3.0/cpp/README.md | 70 +++++ examples/text/ernie-3.0/cpp/gflags.cmake | 76 +++++ examples/text/ernie-3.0/cpp/seq_cls_infer.cc | 269 ++++++++++++++++++ examples/text/ernie-3.0/python/README.md | 71 +++++ .../text/ernie-3.0/python/requirements.txt | 2 + .../text/ernie-3.0/python/seq_cls_infer.py | 182 ++++++++++++ examples/text/ernie-3.0/serving/README.md | 2 +- .../models/ernie_seqcls_model/1/README.md | 2 +- .../models/ernie_tokencls_model/1/README.md | 2 +- 11 files changed, 738 insertions(+), 3 deletions(-) create mode 100644 examples/text/ernie-3.0/README.md create mode 100644 examples/text/ernie-3.0/cpp/CMakeLists.txt create mode 100644 examples/text/ernie-3.0/cpp/README.md create mode 100644 examples/text/ernie-3.0/cpp/gflags.cmake create mode 100644 examples/text/ernie-3.0/cpp/seq_cls_infer.cc create mode 100644 examples/text/ernie-3.0/python/README.md create mode 100644 examples/text/ernie-3.0/python/requirements.txt create mode 100644 examples/text/ernie-3.0/python/seq_cls_infer.py diff --git a/examples/text/ernie-3.0/README.md b/examples/text/ernie-3.0/README.md new file mode 100644 index 0000000000..36d76ee817 --- /dev/null +++ b/examples/text/ernie-3.0/README.md @@ -0,0 +1,39 @@ +# ERNIE 3.0 模型部署 + +## 模型详细说明 +- [PaddleNLP ERNIE 3.0模型说明](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0) + +## 支持的模型列表 + +| 模型 | 结构 | 语言 | +| :---: | :--------: | :--------: | +| `ERNIE 3.0-Base`| 12-layers, 768-hidden, 12-heads | 中文 | +| `ERNIE 3.0-Medium`| 6-layers, 768-hidden, 12-heads | 中文 | +| `ERNIE 3.0-Mini`| 6-layers, 384-hidden, 12-heads | 中文 | +| `ERNIE 3.0-Micro`| 4-layers, 384-hidden, 12-heads | 中文 | +| `ERNIE 3.0-Nano `| 4-layers, 312-hidden, 12-heads | 中文 | + +## 支持的NLP任务列表 + +| 任务 Task | 是否支持 | +| :--------------- | ------- | +| 文本分类 | ✅ | +| 序列标注 | ❌ | + +## 导出部署模型 + +在部署前,需要先将训练好的ERNIE模型导出成部署模型,导出步骤可参考文档[导出模型](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0). + +## 下载微调模型 + +### 分类任务 + +为了方便开发者的测试,下面提供了在文本分类[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上微调的ERNIE 3.0-Medium模型,开发者可直接下载体验。 + +- [ERNIE 3.0 Medium AFQMC](https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz) + +## 详细部署文档 + +- [Python部署](python) +- [C++部署](cpp) +- [Serving部署](serving) diff --git a/examples/text/ernie-3.0/cpp/CMakeLists.txt b/examples/text/ernie-3.0/cpp/CMakeLists.txt new file mode 100644 index 0000000000..fe15b14ff8 --- /dev/null +++ b/examples/text/ernie-3.0/cpp/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") +set(THIRD_LIBS "") +include(gflags.cmake) +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +include_directories(${FASTDEPLOY_INCS}) + +add_executable(seq_cls_infer_demo ${PROJECT_SOURCE_DIR}/seq_cls_infer.cc) +target_link_libraries(seq_cls_infer_demo ${FASTDEPLOY_LIBS} ${THIRD_LIBS}) diff --git a/examples/text/ernie-3.0/cpp/README.md b/examples/text/ernie-3.0/cpp/README.md new file mode 100644 index 0000000000..5c2c854edf --- /dev/null +++ b/examples/text/ernie-3.0/cpp/README.md @@ -0,0 +1,70 @@ +# ERNIE 3.0 模型C++部署示例 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +本目录下提供`seq_cls_infer.cc`快速完成在CPU/GPU的文本分类任务的C++部署示例。 + + +## 文本分类任务 + +### 快速开始 + +以下示例展示如何基于FastDeploy库完成ERNIE 3.0 Medium模型在CLUE Benchmark的[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上进行文本分类任务的C++预测部署。 + +```bash +# 下载SDK,编译模型examples代码(SDK中包含了examples代码) +wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-0.4.0.tgz +tar xvf fastdeploy-linux-x64-gpu-0.4.0.tgz + +cd fastdeploy-linux-x64-gpu-0.4.0/examples/text/ernie-3.0/cpp +mkdir build +cd build +# 执行cmake,需要指定FASTDEPLOY_INSTALL_DIR为FastDeploy SDK的目录。 +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/../../../../../../fastdeploy-linux-x64-gpu-0.4.0 +make -j + +# 下载AFQMC数据集的微调后的ERNIE 3.0模型以及词表 +wget https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz +tar xvfz ernie-3.0-medium-zh-afqmc.tgz + +# CPU 推理 +./seq_cls_infer_demo --device cpu --model_dir ernie-3.0-medium-zh-afqmc + +# GPU 推理 +./seq_cls_infer_demo --device gpu --model_dir ernie-3.0-medium-zh-afqmc + +``` + +运行完成后返回的结果如下: +```bash +[INFO] /paddle/FastDeploy/examples/text/ernie-3.0/cpp/seq_cls_infer.cc(93)::CreateRuntimeOption model_path = ernie-3.0-medium-zh-afqmc/infer.pdmodel, param_path = ernie-3.0-medium-zh-afqmc/infer.pdiparams +[INFO] fastdeploy/runtime.cc(469)::Init Runtime initialized with Backend::ORT in Device::CPU. +Batch id: 0, example id: 0, sentence 1: 花呗收款额度限制, sentence 2: 收钱码,对花呗支付的金额有限制吗, label: 1, confidence: 0.581852 +Batch id: 1, example id: 0, sentence 1: 花呗支持高铁票支付吗, sentence 2: 为什么友付宝不支持花呗付款, label: 0, confidence: 0.997921 +``` + + + +### 参数说明 + +`seq_cls_infer_demo` 除了以上示例的命令行参数,还支持更多命令行参数的设置。以下为各命令行参数的说明。 + +| 参数 |参数说明 | +|----------|--------------| +|--model_dir | 指定部署模型的目录, | +|--batch_size |最大可测的 batch size,默认为 1| +|--max_length |最大序列长度,默认为 128| +|--device | 运行的设备,可选范围: ['cpu', 'gpu'],默认为'cpu' | +|--backend | 支持的推理后端,可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'],默认为'onnx_runtime' | +|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启,默认为False | + +## 相关文档 + +[ERNIE 3.0模型详细介绍](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0) + +[ERNIE 3.0模型导出方法](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0) + +[ERNIE 3.0模型Python部署方法](../python/README.md) diff --git a/examples/text/ernie-3.0/cpp/gflags.cmake b/examples/text/ernie-3.0/cpp/gflags.cmake new file mode 100644 index 0000000000..9fede6c5fe --- /dev/null +++ b/examples/text/ernie-3.0/cpp/gflags.cmake @@ -0,0 +1,76 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(ExternalProject) +SET(GIT_URL "https://github.com") +SET(GFLAGS_PREFIX_DIR ${CMAKE_CURRENT_BINARY_DIR}/gflags) +SET(GFLAGS_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/install/gflags) +SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE) +set(GFLAGS_REPOSITORY ${GIT_URL}/gflags/gflags.git) +set(GFLAGS_TAG "v2.2.2") +IF(WIN32) + set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) +ELSE(WIN32) + set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) + set(BUILD_COMMAND $(MAKE) --silent) + set(INSTALL_COMMAND $(MAKE) install) +ENDIF(WIN32) + +INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR}) + +ExternalProject_Add( + extern_gflags + ${EXTERNAL_PROJECT_LOG_ARGS} + ${SHALLOW_CLONE} + GIT_REPOSITORY ${GFLAGS_REPOSITORY} + GIT_TAG ${GFLAGS_TAG} + PREFIX ${GFLAGS_PREFIX_DIR} + UPDATE_COMMAND "" + BUILD_COMMAND ${BUILD_COMMAND} + INSTALL_COMMAND ${INSTALL_COMMAND} + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DBUILD_STATIC_LIBS=ON + -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES} +) + +ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) +ADD_DEPENDENCIES(gflags extern_gflags) +LIST(APPEND THIRD_LIBS gflags) +if (UNIX) + LIST(APPEND THIRD_LIBS pthread) +endif() +# On Windows (including MinGW), the Shlwapi library is used by gflags if available. +if (WIN32) + include(CheckIncludeFileCXX) + check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI) + if (HAVE_SHLWAPI) + set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib) + endif(HAVE_SHLWAPI) +endif (WIN32) diff --git a/examples/text/ernie-3.0/cpp/seq_cls_infer.cc b/examples/text/ernie-3.0/cpp/seq_cls_infer.cc new file mode 100644 index 0000000000..01ef403a0b --- /dev/null +++ b/examples/text/ernie-3.0/cpp/seq_cls_infer.cc @@ -0,0 +1,269 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include +#include +#include "fastdeploy/function/reduce.h" +#include "fastdeploy/function/softmax.h" +#include "fastdeploy/runtime.h" +#include "fastdeploy/utils/path.h" +#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h" +#include "gflags/gflags.h" + +using namespace paddlenlp; +using namespace faster_tokenizer::tokenizers_impl; +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +DEFINE_string(model_dir, "", "Directory of the inference model."); +DEFINE_string(vocab_path, "", "Path of the vocab file."); +DEFINE_string(device, "cpu", + "Type of inference device, support 'cpu' or 'gpu'."); +DEFINE_string(backend, "onnx_runtime", + "The inference runtime backend, support: ['onnx_runtime', " + "'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt']"); +DEFINE_int32(batch_size, 1, "The batch size of data."); +DEFINE_int32(max_length, 128, "The batch size of data."); +DEFINE_bool(use_fp16, false, "Wheter to use FP16 mode."); + +void PrintUsage() { + fastdeploy::FDINFO + << "Usage: seq_cls_infer_demo --model_dir dir --device [cpu|gpu] " + "--backend " + "[onnx_runtime|paddle|openvino|tensorrt|paddle_tensorrt] " + "--batch_size size --max_length len --use_fp16 false" + << std::endl; + fastdeploy::FDINFO << "Default value of device: cpu" << std::endl; + fastdeploy::FDINFO << "Default value of backend: onnx_runtime" << std::endl; + fastdeploy::FDINFO << "Default value of batch_size: 1" << std::endl; + fastdeploy::FDINFO << "Default value of max_length: 128" << std::endl; + fastdeploy::FDINFO << "Default value of use_fp16: false" << std::endl; +} + +bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) { + if (FLAGS_device == "gpu") { + option->UseGpu(); + } else if (FLAGS_device == "cpu") { + option->UseCpu(); + } else { + fastdeploy::FDERROR << "The avilable device should be one of the list " + "['cpu', 'gpu']. But receive '" + << FLAGS_device << "'" << std::endl; + return false; + } + + if (FLAGS_backend == "onnx_runtime") { + option->UseOrtBackend(); + } else if (FLAGS_backend == "paddle") { + option->UsePaddleBackend(); + } else if (FLAGS_backend == "openvino") { + option->UseOpenVINOBackend(); + } else if (FLAGS_backend == "tensorrt" || + FLAGS_backend == "paddle_tensorrt") { + option->UseTrtBackend(); + if (FLAGS_backend == "paddle_tensorrt") { + option->EnablePaddleToTrt(); + option->EnablePaddleTrtCollectShape(); + } + std::string trt_file = FLAGS_model_dir + sep + "infer.trt"; + option->SetTrtInputShape("input_ids", {1, FLAGS_max_length}, + {FLAGS_batch_size, FLAGS_max_length}, + {FLAGS_batch_size, FLAGS_max_length}); + option->SetTrtInputShape("token_type_ids", {1, FLAGS_max_length}, + {FLAGS_batch_size, FLAGS_max_length}, + {FLAGS_batch_size, FLAGS_max_length}); + if (FLAGS_use_fp16) { + option->EnableTrtFP16(); + trt_file = trt_file + ".fp16"; + } + } else { + fastdeploy::FDERROR << "The avilable backend should be one of the list " + "['paddle', 'openvino', 'tensorrt', " + "'paddle_tensorrt']. But receive '" + << FLAGS_backend << "'" << std::endl; + return false; + } + std::string model_path = FLAGS_model_dir + sep + "infer.pdmodel"; + std::string param_path = FLAGS_model_dir + sep + "infer.pdiparams"; + fastdeploy::FDINFO << "model_path = " << model_path + << ", param_path = " << param_path << std::endl; + option->SetModelPath(model_path, param_path); + return true; +} + +bool BatchFyTexts(const std::vector& texts, int batch_size, + std::vector>* batch_texts) { + for (int idx = 0; idx < texts.size(); idx += batch_size) { + int rest = texts.size() - idx; + int curr_size = std::min(batch_size, rest); + std::vector batch_text(curr_size); + std::copy_n(texts.begin() + idx, curr_size, batch_text.begin()); + batch_texts->emplace_back(std::move(batch_text)); + } + return true; +} + +struct SeqClsResult { + int label; + float confidence; +}; + +struct ErnieForSequenceClassificationPredictor { + fastdeploy::Runtime runtime_; + ErnieFasterTokenizer tokenizer_; + ErnieForSequenceClassificationPredictor( + const fastdeploy::RuntimeOption& option, + const ErnieFasterTokenizer& tokenizer) + : tokenizer_(tokenizer) { + runtime_.Init(option); + } + + bool Preprocess(const std::vector& texts, + const std::vector& texts_pair, + std::vector* inputs) { + std::vector encodings; + std::vector text_pair_input; + // 1. Tokenize the text or (text, text_pair) + if (texts_pair.empty()) { + for (int i = 0; i < texts.size(); ++i) { + text_pair_input.emplace_back(texts[i]); + } + } else { + if (texts.size() != texts_pair.size()) { + return false; + } + for (int i = 0; i < texts.size(); ++i) { + text_pair_input.emplace_back( + std::pair(texts[i], texts_pair[i])); + } + } + tokenizer_.EncodeBatchStrings(text_pair_input, &encodings); + // 2. Construct the input vector tensor + // 2.1 Allocate input tensor + int64_t batch_size = texts.size(); + int64_t seq_len = 0; + if (batch_size > 0) { + seq_len = encodings[0].GetIds().size(); + } + inputs->resize(runtime_.NumInputs()); + for (int i = 0; i < runtime_.NumInputs(); ++i) { + (*inputs)[i].Allocate({batch_size, seq_len}, + fastdeploy::FDDataType::INT64, + runtime_.GetInputInfo(i).name); + } + // 2.2 Set the value of data + size_t start = 0; + int64_t* input_ids_ptr = + reinterpret_cast((*inputs)[0].MutableData()); + int64_t* type_ids_ptr = + reinterpret_cast((*inputs)[1].MutableData()); + for (int i = 0; i < encodings.size(); ++i) { + auto&& curr_input_ids = encodings[i].GetIds(); + auto&& curr_type_ids = encodings[i].GetTypeIds(); + std::copy(curr_input_ids.begin(), curr_input_ids.end(), + input_ids_ptr + start); + std::copy(curr_type_ids.begin(), curr_type_ids.end(), + type_ids_ptr + start); + start += seq_len; + } + return true; + } + + bool Postprocess(const std::vector& outputs, + std::vector* seq_cls_results) { + const auto& logits = outputs[0]; + fastdeploy::FDTensor probs; + fastdeploy::Softmax(logits, &probs); + + fastdeploy::FDTensor labels, confidences; + fastdeploy::Max(probs, &confidences, {-1}); + fastdeploy::ArgMax(probs, &labels, -1); + if (labels.Numel() != confidences.Numel()) { + return false; + } + + seq_cls_results->resize(labels.Numel()); + int64_t* label_ptr = reinterpret_cast(labels.Data()); + float* confidence_ptr = reinterpret_cast(confidences.Data()); + for (int i = 0; i < labels.Numel(); ++i) { + (*seq_cls_results)[i].label = label_ptr[i]; + (*seq_cls_results)[i].confidence = confidence_ptr[i]; + } + return true; + } + + bool Predict(const std::vector& texts, + const std::vector& texts_pair, + std::vector* seq_cls_results) { + std::vector inputs; + if (!Preprocess(texts, texts_pair, &inputs)) { + return false; + } + + std::vector outputs(runtime_.NumOutputs()); + runtime_.Infer(inputs, &outputs); + + if (!Postprocess(outputs, seq_cls_results)) { + return false; + } + return true; + } +}; + +int main(int argc, char* argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + auto option = fastdeploy::RuntimeOption(); + if (!CreateRuntimeOption(&option)) { + PrintUsage(); + return -1; + } + + std::string vocab_path = FLAGS_vocab_path; + if (!fastdeploy::CheckFileExists(vocab_path)) { + vocab_path = fastdeploy::PathJoin(FLAGS_model_dir, "vocab.txt"); + if (!fastdeploy::CheckFileExists(vocab_path)) { + fastdeploy::FDERROR << "The path of vocab " << vocab_path + << " doesn't exist" << std::endl; + PrintUsage(); + return -1; + } + } + ErnieFasterTokenizer tokenizer(vocab_path); + + ErnieForSequenceClassificationPredictor predictor(option, tokenizer); + + std::vector seq_cls_results; + std::vector texts_ds = {"花呗收款额度限制", + "花呗支持高铁票支付吗"}; + std::vector texts_pair_ds = {"收钱码,对花呗支付的金额有限制吗", + "为什么友付宝不支持花呗付款"}; + std::vector> batch_texts, batch_texts_pair; + BatchFyTexts(texts_ds, FLAGS_batch_size, &batch_texts); + BatchFyTexts(texts_pair_ds, FLAGS_batch_size, &batch_texts_pair); + for (int bs = 0; bs < batch_texts.size(); ++bs) { + predictor.Predict(batch_texts[bs], batch_texts_pair[bs], &seq_cls_results); + for (int i = 0; i < batch_texts[bs].size(); ++i) { + std::cout << "Batch id: " << bs << ", example id: " << i + << ", sentence 1: " << batch_texts[bs][i] + << ", sentence 2: " << batch_texts_pair[bs][i] + << ", label: " << seq_cls_results[i].label + << ", confidence: " << seq_cls_results[i].confidence + << std::endl; + } + } + return 0; +} diff --git a/examples/text/ernie-3.0/python/README.md b/examples/text/ernie-3.0/python/README.md new file mode 100644 index 0000000000..12487aa7ad --- /dev/null +++ b/examples/text/ernie-3.0/python/README.md @@ -0,0 +1,71 @@ +# ERNIE 3.0 模型Python部署示例 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +本目录下提供`seq_cls_infer.py`快速完成在CPU/GPU的文本分类任务的部署示例。 + +## 依赖安装 + +本项目提供的Python版本的预测器Predictor基于PaddleNLP提供的AutoTokenizer进行分词,并利用fast_tokenizer加速分词, 执行以下命令进行安装。 + +```bash +pip install -r requirements.txt +``` + + +## 文本分类任务 + +### 快速开始 + +以下示例展示如何基于FastDeploy库完成ERNIE 3.0 Medium模型在CLUE Benchmark 的[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上进行文本分类任务的Python预测部署。 + +```bash + +# 下载部署示例代码 +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy/examples/text/ernie-3.0/python + +# 下载AFQMC数据集的微调后的ERNIE 3.0模型 +wget https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz +tar xvfz ernie-3.0-medium-zh-afqmc.tgz + +# CPU 推理 +python seq_cls_infer.py --device cpu --model_dir ernie-3.0-medium-zh-afqmc + +# GPU 推理 +python seq_cls_infer.py --device gpu --model_dir ernie-3.0-medium-zh-afqmc + +``` + +运行完成后返回的结果如下: + +```bash +[INFO] fastdeploy/runtime.cc(469)::Init Runtime initialized with Backend::ORT in Device::CPU. +Batch id:0, example id:0, sentence1:花呗收款额度限制, sentence2:收钱码,对花呗支付的金额有限制吗, label:1, similarity:0.5819 +Batch id:1, example id:0, sentence1:花呗支持高铁票支付吗, sentence2:为什么友付宝不支持花呗付款, label:0, similarity:0.9979 +``` + +### 参数说明 + +`seq_cls_infer.py` 除了以上示例的命令行参数,还支持更多命令行参数的设置。以下为各命令行参数的说明。 + +| 参数 |参数说明 | +|----------|--------------| +|--model_dir | 指定部署模型的目录, | +|--batch_size |最大可测的 batch size,默认为 1| +|--max_length |最大序列长度,默认为 128| +|--device | 运行的设备,可选范围: ['cpu', 'gpu'],默认为'cpu' | +|--backend | 支持的推理后端,可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'],默认为'onnx_runtime' | +|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启,默认为False | +|--use_fast| 是否使用FastTokenizer加速分词阶段。默认为True| + +## 相关文档 + +[ERNIE 3.0模型详细介绍](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0) + +[ERNIE 3.0模型导出方法](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0) + +[ERNIE 3.0模型C++部署方法](../cpp/README.md) diff --git a/examples/text/ernie-3.0/python/requirements.txt b/examples/text/ernie-3.0/python/requirements.txt new file mode 100644 index 0000000000..204cf718cd --- /dev/null +++ b/examples/text/ernie-3.0/python/requirements.txt @@ -0,0 +1,2 @@ +faster_toeknizer +paddlenlp diff --git a/examples/text/ernie-3.0/python/seq_cls_infer.py b/examples/text/ernie-3.0/python/seq_cls_infer.py new file mode 100644 index 0000000000..de67884a1d --- /dev/null +++ b/examples/text/ernie-3.0/python/seq_cls_infer.py @@ -0,0 +1,182 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import distutils.util + +import numpy as np +import faster_tokenizer +from paddlenlp.transformers import AutoTokenizer +import fastdeploy as fd + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_dir", required=True, help="The directory of model.") + parser.add_argument( + "--vocab_path", + type=str, + default="", + help="The path of tokenizer vocab.") + parser.add_argument( + "--device", + type=str, + default='cpu', + choices=['gpu', 'cpu'], + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--backend", + type=str, + default='onnx_runtime', + choices=[ + 'onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt' + ], + help="The inference runtime backend.") + parser.add_argument( + "--batch_size", type=int, default=1, help="The batch size of data.") + parser.add_argument( + "--max_length", + type=int, + default=128, + help="The max length of sequence.") + parser.add_argument( + "--log_interval", + type=int, + default=10, + help="The interval of logging.") + parser.add_argument( + "--use_fp16", + type=distutils.util.strtobool, + default=False, + help="Wheter to use FP16 mode") + parser.add_argument( + "--use_fast", + type=distutils.util.strtobool, + default=False, + help="Whether to use fast_tokenizer to accelarate the tokenization.") + return parser.parse_args() + + +def batchfy_text(texts, batch_size): + batch_texts = [] + batch_start = 0 + while batch_start < len(texts): + batch_texts += [ + texts[batch_start:min(batch_start + batch_size, len(texts))] + ] + batch_start += batch_size + return batch_texts + + +class ErnieForSequenceClassificationPredictor(object): + def __init__(self, args): + self.tokenizer = AutoTokenizer.from_pretrained( + 'ernie-3.0-medium-zh', use_faster=args.use_fast) + self.runtime = self.create_fd_runtime(args) + self.batch_size = args.batch_size + self.max_length = args.max_length + + def create_fd_runtime(self, args): + option = fd.RuntimeOption() + model_path = os.path.join(args.model_dir, "infer.pdmodel") + params_path = os.path.join(args.model_dir, "infer.pdiparams") + option.set_model_path(model_path, params_path) + if args.device == 'cpu': + option.use_cpu() + else: + option.use_gpu() + if args.backend == 'paddle': + option.use_paddle_backend() + elif args.backend == 'onnx_runtime': + option.use_ort_backend() + elif args.backend == 'openvino': + option.use_openvino_backend() + else: + option.use_trt_backend() + if args.backend == 'paddle_tensorrt': + option.enable_paddle_to_trt() + option.enable_paddle_trt_collect_shape() + trt_file = os.path.join(args.model_dir, "infer.trt") + option.set_trt_input_shape( + 'input_ids', + min_shape=[1, args.max_length], + opt_shape=[args.batch_size, args.max_length], + max_shape=[args.batch_size, args.max_length]) + option.set_trt_input_shape( + 'token_type_ids', + min_shape=[1, args.max_length], + opt_shape=[args.batch_size, args.max_length], + max_shape=[args.batch_size, args.max_length]) + if args.use_fp16: + option.enable_trt_fp16() + trt_file = trt_file + ".fp16" + option.set_trt_cache_file(trt_file) + return fd.Runtime(option) + + def preprocess(self, texts, texts_pair): + data = self.tokenizer( + texts, + texts_pair, + max_length=self.max_length, + padding=True, + truncation=True) + input_ids_name = self.runtime.get_input_info(0).name + token_type_ids_name = self.runtime.get_input_info(1).name + input_map = { + input_ids_name: np.array( + data["input_ids"], dtype="int64"), + token_type_ids_name: np.array( + data["token_type_ids"], dtype="int64") + } + return input_map + + def infer(self, input_map): + results = self.runtime.infer(input_map) + return results + + def postprocess(self, infer_data): + logits = np.array(infer_data[0]) + max_value = np.max(logits, axis=1, keepdims=True) + exp_data = np.exp(logits - max_value) + probs = exp_data / np.sum(exp_data, axis=1, keepdims=True) + out_dict = { + "label": probs.argmax(axis=-1), + "confidence": probs.max(axis=-1) + } + return out_dict + + def predict(self, texts, texts_pair=None): + input_map = self.preprocess(texts, texts_pair) + infer_result = self.infer(input_map) + output = self.postprocess(infer_result) + return output + + +if __name__ == "__main__": + args = parse_arguments() + predictor = ErnieForSequenceClassificationPredictor(args) + texts_ds = ["花呗收款额度限制", "花呗支持高铁票支付吗"] + texts_pair_ds = ["收钱码,对花呗支付的金额有限制吗", "为什么友付宝不支持花呗付款"] + batch_texts = batchfy_text(texts_ds, args.batch_size) + batch_texts_pair = batchfy_text(texts_pair_ds, args.batch_size) + + for bs, (texts, + texts_pair) in enumerate(zip(batch_texts, batch_texts_pair)): + outputs = predictor.predict(texts, texts_pair) + for i, (sentence1, sentence2) in enumerate(zip(texts, texts_pair)): + print( + f"Batch id:{bs}, example id:{i}, sentence1:{sentence1}, sentence2:{sentence2}, label:{outputs['label'][i]}, similarity:{outputs['confidence'][i]:.4f}" + ) diff --git a/examples/text/ernie-3.0/serving/README.md b/examples/text/ernie-3.0/serving/README.md index 487a5eddca..fcf3b720ec 100644 --- a/examples/text/ernie-3.0/serving/README.md +++ b/examples/text/ernie-3.0/serving/README.md @@ -1,4 +1,4 @@ -# Ernie-3.0 服务化部署示例 +# ERNIE 3.0 服务化部署示例 ## 准备模型 diff --git a/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md b/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md index aaca8a9ec9..b3ce2c1ae2 100644 --- a/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md +++ b/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md @@ -1 +1 @@ -本目录存放Ernie-3.0模型 +本目录存放ERNIE 3.0模型 diff --git a/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md b/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md index aaca8a9ec9..b3ce2c1ae2 100644 --- a/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md +++ b/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md @@ -1 +1 @@ -本目录存放Ernie-3.0模型 +本目录存放ERNIE 3.0模型