From 8fd61e3634c3255e790e1a7c444e35576c5937e9 Mon Sep 17 00:00:00 2001
From: Jack Zhou <zhoushunjie@baidu.com>
Date: Tue, 8 Nov 2022 10:54:59 +0800
Subject: [PATCH] [Model] Add text classification task for ernie-3.0 (#430)

* move text_cls to ernie-3.0

* Add main page of ernie-3.0

* rename infer -> seq_cls_infer

* Fix the links

* Add ernie-3.0 python, cpp readme

* Fix some cpp readme

* Add fastdeploy::FDERROR

* Add python readme for ernie-3.0

* update README.md

* Add empty line

* update readme

* Fix readme

* remove the - from ernie 3.0

* ernie-3.0 -> ernie 3.0

* Use AutoTokenizer to tokenize

* Ernie -> ERNIE
---
 examples/text/ernie-3.0/README.md             |  39 +++
 examples/text/ernie-3.0/cpp/CMakeLists.txt    |  26 ++
 examples/text/ernie-3.0/cpp/README.md         |  70 +++++
 examples/text/ernie-3.0/cpp/gflags.cmake      |  76 +++++
 examples/text/ernie-3.0/cpp/seq_cls_infer.cc  | 269 ++++++++++++++++++
 examples/text/ernie-3.0/python/README.md      |  71 +++++
 .../text/ernie-3.0/python/requirements.txt    |   2 +
 .../text/ernie-3.0/python/seq_cls_infer.py    | 182 ++++++++++++
 examples/text/ernie-3.0/serving/README.md     |   2 +-
 .../models/ernie_seqcls_model/1/README.md     |   2 +-
 .../models/ernie_tokencls_model/1/README.md   |   2 +-
 11 files changed, 738 insertions(+), 3 deletions(-)
 create mode 100644 examples/text/ernie-3.0/README.md
 create mode 100644 examples/text/ernie-3.0/cpp/CMakeLists.txt
 create mode 100644 examples/text/ernie-3.0/cpp/README.md
 create mode 100644 examples/text/ernie-3.0/cpp/gflags.cmake
 create mode 100644 examples/text/ernie-3.0/cpp/seq_cls_infer.cc
 create mode 100644 examples/text/ernie-3.0/python/README.md
 create mode 100644 examples/text/ernie-3.0/python/requirements.txt
 create mode 100644 examples/text/ernie-3.0/python/seq_cls_infer.py

diff --git a/examples/text/ernie-3.0/README.md b/examples/text/ernie-3.0/README.md
new file mode 100644
index 0000000000..36d76ee817
--- /dev/null
+++ b/examples/text/ernie-3.0/README.md
@@ -0,0 +1,39 @@
+# ERNIE 3.0 模型部署
+
+## 模型详细说明
+- [PaddleNLP ERNIE 3.0模型说明](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
+
+## 支持的模型列表
+
+| 模型 |  结构  | 语言 |
+| :---: | :--------: | :--------: |
+| `ERNIE 3.0-Base`| 12-layers, 768-hidden, 12-heads | 中文 |
+| `ERNIE 3.0-Medium`| 6-layers, 768-hidden, 12-heads | 中文 |
+| `ERNIE 3.0-Mini`| 6-layers, 384-hidden, 12-heads | 中文 |
+| `ERNIE 3.0-Micro`| 4-layers, 384-hidden, 12-heads | 中文 |
+| `ERNIE 3.0-Nano `| 4-layers, 312-hidden, 12-heads | 中文 |
+
+## 支持的NLP任务列表
+
+| 任务 Task  |  是否支持   |
+| :--------------- | ------- |
+| 文本分类 | ✅ |
+| 序列标注 | ❌ |
+
+## 导出部署模型
+
+在部署前，需要先将训练好的ERNIE模型导出成部署模型，导出步骤可参考文档[导出模型](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0).
+
+## 下载微调模型
+
+### 分类任务
+
+为了方便开发者的测试，下面提供了在文本分类[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上微调的ERNIE 3.0-Medium模型，开发者可直接下载体验。
+
+- [ERNIE 3.0 Medium AFQMC](https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz)
+
+## 详细部署文档
+
+- [Python部署](python)
+- [C++部署](cpp)
+- [Serving部署](serving)
diff --git a/examples/text/ernie-3.0/cpp/CMakeLists.txt b/examples/text/ernie-3.0/cpp/CMakeLists.txt
new file mode 100644
index 0000000000..fe15b14ff8
--- /dev/null
+++ b/examples/text/ernie-3.0/cpp/CMakeLists.txt
@@ -0,0 +1,26 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+set(THIRD_LIBS "")
+include(gflags.cmake)
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(seq_cls_infer_demo ${PROJECT_SOURCE_DIR}/seq_cls_infer.cc)
+target_link_libraries(seq_cls_infer_demo ${FASTDEPLOY_LIBS} ${THIRD_LIBS})
diff --git a/examples/text/ernie-3.0/cpp/README.md b/examples/text/ernie-3.0/cpp/README.md
new file mode 100644
index 0000000000..5c2c854edf
--- /dev/null
+++ b/examples/text/ernie-3.0/cpp/README.md
@@ -0,0 +1,70 @@
+# ERNIE 3.0 模型C++部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`seq_cls_infer.cc`快速完成在CPU/GPU的文本分类任务的C++部署示例。
+
+
+## 文本分类任务
+
+### 快速开始
+
+以下示例展示如何基于FastDeploy库完成ERNIE 3.0 Medium模型在CLUE Benchmark的[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上进行文本分类任务的C++预测部署。
+
+```bash
+# 下载SDK，编译模型examples代码（SDK中包含了examples代码）
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-0.4.0.tgz
+tar xvf fastdeploy-linux-x64-gpu-0.4.0.tgz
+
+cd fastdeploy-linux-x64-gpu-0.4.0/examples/text/ernie-3.0/cpp
+mkdir build
+cd build
+# 执行cmake，需要指定FASTDEPLOY_INSTALL_DIR为FastDeploy SDK的目录。
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/../../../../../../fastdeploy-linux-x64-gpu-0.4.0
+make -j
+
+# 下载AFQMC数据集的微调后的ERNIE 3.0模型以及词表
+wget https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz
+tar xvfz ernie-3.0-medium-zh-afqmc.tgz
+
+# CPU 推理
+./seq_cls_infer_demo --device cpu --model_dir ernie-3.0-medium-zh-afqmc
+
+# GPU 推理
+./seq_cls_infer_demo --device gpu --model_dir ernie-3.0-medium-zh-afqmc
+
+```
+
+运行完成后返回的结果如下：
+```bash
+[INFO] /paddle/FastDeploy/examples/text/ernie-3.0/cpp/seq_cls_infer.cc(93)::CreateRuntimeOption	model_path = ernie-3.0-medium-zh-afqmc/infer.pdmodel, param_path = ernie-3.0-medium-zh-afqmc/infer.pdiparams
+[INFO] fastdeploy/runtime.cc(469)::Init	Runtime initialized with Backend::ORT in Device::CPU.
+Batch id: 0, example id: 0, sentence 1: 花呗收款额度限制, sentence 2: 收钱码，对花呗支付的金额有限制吗, label: 1, confidence:  0.581852
+Batch id: 1, example id: 0, sentence 1: 花呗支持高铁票支付吗, sentence 2: 为什么友付宝不支持花呗付款, label: 0, confidence:  0.997921
+```
+
+
+
+### 参数说明
+
+`seq_cls_infer_demo` 除了以上示例的命令行参数，还支持更多命令行参数的设置。以下为各命令行参数的说明。
+
+| 参数 |参数说明 |
+|----------|--------------|
+|--model_dir | 指定部署模型的目录， |
+|--batch_size |最大可测的 batch size，默认为 1|
+|--max_length |最大序列长度，默认为 128|
+|--device | 运行的设备，可选范围: ['cpu', 'gpu']，默认为'cpu' |
+|--backend | 支持的推理后端，可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt']，默认为'onnx_runtime' |
+|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启，默认为False |
+
+## 相关文档
+
+[ERNIE 3.0模型详细介绍](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
+
+[ERNIE 3.0模型导出方法](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
+
+[ERNIE 3.0模型Python部署方法](../python/README.md)
diff --git a/examples/text/ernie-3.0/cpp/gflags.cmake b/examples/text/ernie-3.0/cpp/gflags.cmake
new file mode 100644
index 0000000000..9fede6c5fe
--- /dev/null
+++ b/examples/text/ernie-3.0/cpp/gflags.cmake
@@ -0,0 +1,76 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+INCLUDE(ExternalProject)
+SET(GIT_URL "https://github.com")
+SET(GFLAGS_PREFIX_DIR  ${CMAKE_CURRENT_BINARY_DIR}/gflags)
+SET(GFLAGS_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/install/gflags)
+SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
+set(GFLAGS_REPOSITORY ${GIT_URL}/gflags/gflags.git)
+set(GFLAGS_TAG "v2.2.2")
+IF(WIN32)
+  set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
+ELSE(WIN32)
+  set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
+  set(BUILD_COMMAND $(MAKE) --silent)
+  set(INSTALL_COMMAND $(MAKE) install)
+ENDIF(WIN32)
+
+INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
+
+ExternalProject_Add(
+    extern_gflags
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    ${SHALLOW_CLONE}
+    GIT_REPOSITORY  ${GFLAGS_REPOSITORY}
+    GIT_TAG         ${GFLAGS_TAG}
+    PREFIX          ${GFLAGS_PREFIX_DIR}
+    UPDATE_COMMAND  ""
+    BUILD_COMMAND   ${BUILD_COMMAND}
+    INSTALL_COMMAND ${INSTALL_COMMAND}
+    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+                    -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+                    -DBUILD_STATIC_LIBS=ON
+                    -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
+                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+                    -DBUILD_TESTING=OFF
+                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
+                    ${EXTERNAL_OPTIONAL_ARGS}
+    CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
+                     -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+                     -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
+    BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
+)
+
+ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
+ADD_DEPENDENCIES(gflags extern_gflags)
+LIST(APPEND THIRD_LIBS gflags)
+if (UNIX)
+    LIST(APPEND THIRD_LIBS pthread)
+endif()
+# On Windows (including MinGW), the Shlwapi library is used by gflags if available.
+if (WIN32)
+  include(CheckIncludeFileCXX)
+  check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI)
+  if (HAVE_SHLWAPI)
+    set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
+  endif(HAVE_SHLWAPI)
+endif (WIN32)
diff --git a/examples/text/ernie-3.0/cpp/seq_cls_infer.cc b/examples/text/ernie-3.0/cpp/seq_cls_infer.cc
new file mode 100644
index 0000000000..01ef403a0b
--- /dev/null
+++ b/examples/text/ernie-3.0/cpp/seq_cls_infer.cc
@@ -0,0 +1,269 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <iostream>
+#include <sstream>
+#include <vector>
+#include "fastdeploy/function/reduce.h"
+#include "fastdeploy/function/softmax.h"
+#include "fastdeploy/runtime.h"
+#include "fastdeploy/utils/path.h"
+#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h"
+#include "gflags/gflags.h"
+
+using namespace paddlenlp;
+using namespace faster_tokenizer::tokenizers_impl;
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+DEFINE_string(model_dir, "", "Directory of the inference model.");
+DEFINE_string(vocab_path, "", "Path of the vocab file.");
+DEFINE_string(device, "cpu",
+              "Type of inference device, support 'cpu' or 'gpu'.");
+DEFINE_string(backend, "onnx_runtime",
+              "The inference runtime backend, support: ['onnx_runtime', "
+              "'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt']");
+DEFINE_int32(batch_size, 1, "The batch size of data.");
+DEFINE_int32(max_length, 128, "The batch size of data.");
+DEFINE_bool(use_fp16, false, "Wheter to use FP16 mode.");
+
+void PrintUsage() {
+  fastdeploy::FDINFO
+      << "Usage: seq_cls_infer_demo --model_dir dir --device [cpu|gpu] "
+         "--backend "
+         "[onnx_runtime|paddle|openvino|tensorrt|paddle_tensorrt] "
+         "--batch_size size --max_length len --use_fp16 false"
+      << std::endl;
+  fastdeploy::FDINFO << "Default value of device: cpu" << std::endl;
+  fastdeploy::FDINFO << "Default value of backend: onnx_runtime" << std::endl;
+  fastdeploy::FDINFO << "Default value of batch_size: 1" << std::endl;
+  fastdeploy::FDINFO << "Default value of max_length: 128" << std::endl;
+  fastdeploy::FDINFO << "Default value of use_fp16: false" << std::endl;
+}
+
+bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
+  if (FLAGS_device == "gpu") {
+    option->UseGpu();
+  } else if (FLAGS_device == "cpu") {
+    option->UseCpu();
+  } else {
+    fastdeploy::FDERROR << "The avilable device should be one of the list "
+                           "['cpu', 'gpu']. But receive '"
+                        << FLAGS_device << "'" << std::endl;
+    return false;
+  }
+
+  if (FLAGS_backend == "onnx_runtime") {
+    option->UseOrtBackend();
+  } else if (FLAGS_backend == "paddle") {
+    option->UsePaddleBackend();
+  } else if (FLAGS_backend == "openvino") {
+    option->UseOpenVINOBackend();
+  } else if (FLAGS_backend == "tensorrt" ||
+             FLAGS_backend == "paddle_tensorrt") {
+    option->UseTrtBackend();
+    if (FLAGS_backend == "paddle_tensorrt") {
+      option->EnablePaddleToTrt();
+      option->EnablePaddleTrtCollectShape();
+    }
+    std::string trt_file = FLAGS_model_dir + sep + "infer.trt";
+    option->SetTrtInputShape("input_ids", {1, FLAGS_max_length},
+                             {FLAGS_batch_size, FLAGS_max_length},
+                             {FLAGS_batch_size, FLAGS_max_length});
+    option->SetTrtInputShape("token_type_ids", {1, FLAGS_max_length},
+                             {FLAGS_batch_size, FLAGS_max_length},
+                             {FLAGS_batch_size, FLAGS_max_length});
+    if (FLAGS_use_fp16) {
+      option->EnableTrtFP16();
+      trt_file = trt_file + ".fp16";
+    }
+  } else {
+    fastdeploy::FDERROR << "The avilable backend should be one of the list "
+                           "['paddle', 'openvino', 'tensorrt', "
+                           "'paddle_tensorrt']. But receive '"
+                        << FLAGS_backend << "'" << std::endl;
+    return false;
+  }
+  std::string model_path = FLAGS_model_dir + sep + "infer.pdmodel";
+  std::string param_path = FLAGS_model_dir + sep + "infer.pdiparams";
+  fastdeploy::FDINFO << "model_path = " << model_path
+                     << ", param_path = " << param_path << std::endl;
+  option->SetModelPath(model_path, param_path);
+  return true;
+}
+
+bool BatchFyTexts(const std::vector<std::string>& texts, int batch_size,
+                  std::vector<std::vector<std::string>>* batch_texts) {
+  for (int idx = 0; idx < texts.size(); idx += batch_size) {
+    int rest = texts.size() - idx;
+    int curr_size = std::min(batch_size, rest);
+    std::vector<std::string> batch_text(curr_size);
+    std::copy_n(texts.begin() + idx, curr_size, batch_text.begin());
+    batch_texts->emplace_back(std::move(batch_text));
+  }
+  return true;
+}
+
+struct SeqClsResult {
+  int label;
+  float confidence;
+};
+
+struct ErnieForSequenceClassificationPredictor {
+  fastdeploy::Runtime runtime_;
+  ErnieFasterTokenizer tokenizer_;
+  ErnieForSequenceClassificationPredictor(
+      const fastdeploy::RuntimeOption& option,
+      const ErnieFasterTokenizer& tokenizer)
+      : tokenizer_(tokenizer) {
+    runtime_.Init(option);
+  }
+
+  bool Preprocess(const std::vector<std::string>& texts,
+                  const std::vector<std::string>& texts_pair,
+                  std::vector<fastdeploy::FDTensor>* inputs) {
+    std::vector<faster_tokenizer::core::Encoding> encodings;
+    std::vector<faster_tokenizer::core::EncodeInput> text_pair_input;
+    // 1. Tokenize the text or (text, text_pair)
+    if (texts_pair.empty()) {
+      for (int i = 0; i < texts.size(); ++i) {
+        text_pair_input.emplace_back(texts[i]);
+      }
+    } else {
+      if (texts.size() != texts_pair.size()) {
+        return false;
+      }
+      for (int i = 0; i < texts.size(); ++i) {
+        text_pair_input.emplace_back(
+            std::pair<std::string, std::string>(texts[i], texts_pair[i]));
+      }
+    }
+    tokenizer_.EncodeBatchStrings(text_pair_input, &encodings);
+    // 2. Construct the input vector tensor
+    // 2.1 Allocate input tensor
+    int64_t batch_size = texts.size();
+    int64_t seq_len = 0;
+    if (batch_size > 0) {
+      seq_len = encodings[0].GetIds().size();
+    }
+    inputs->resize(runtime_.NumInputs());
+    for (int i = 0; i < runtime_.NumInputs(); ++i) {
+      (*inputs)[i].Allocate({batch_size, seq_len},
+                            fastdeploy::FDDataType::INT64,
+                            runtime_.GetInputInfo(i).name);
+    }
+    // 2.2 Set the value of data
+    size_t start = 0;
+    int64_t* input_ids_ptr =
+        reinterpret_cast<int64_t*>((*inputs)[0].MutableData());
+    int64_t* type_ids_ptr =
+        reinterpret_cast<int64_t*>((*inputs)[1].MutableData());
+    for (int i = 0; i < encodings.size(); ++i) {
+      auto&& curr_input_ids = encodings[i].GetIds();
+      auto&& curr_type_ids = encodings[i].GetTypeIds();
+      std::copy(curr_input_ids.begin(), curr_input_ids.end(),
+                input_ids_ptr + start);
+      std::copy(curr_type_ids.begin(), curr_type_ids.end(),
+                type_ids_ptr + start);
+      start += seq_len;
+    }
+    return true;
+  }
+
+  bool Postprocess(const std::vector<fastdeploy::FDTensor>& outputs,
+                   std::vector<SeqClsResult>* seq_cls_results) {
+    const auto& logits = outputs[0];
+    fastdeploy::FDTensor probs;
+    fastdeploy::Softmax(logits, &probs);
+
+    fastdeploy::FDTensor labels, confidences;
+    fastdeploy::Max(probs, &confidences, {-1});
+    fastdeploy::ArgMax(probs, &labels, -1);
+    if (labels.Numel() != confidences.Numel()) {
+      return false;
+    }
+
+    seq_cls_results->resize(labels.Numel());
+    int64_t* label_ptr = reinterpret_cast<int64_t*>(labels.Data());
+    float* confidence_ptr = reinterpret_cast<float*>(confidences.Data());
+    for (int i = 0; i < labels.Numel(); ++i) {
+      (*seq_cls_results)[i].label = label_ptr[i];
+      (*seq_cls_results)[i].confidence = confidence_ptr[i];
+    }
+    return true;
+  }
+
+  bool Predict(const std::vector<std::string>& texts,
+               const std::vector<std::string>& texts_pair,
+               std::vector<SeqClsResult>* seq_cls_results) {
+    std::vector<fastdeploy::FDTensor> inputs;
+    if (!Preprocess(texts, texts_pair, &inputs)) {
+      return false;
+    }
+
+    std::vector<fastdeploy::FDTensor> outputs(runtime_.NumOutputs());
+    runtime_.Infer(inputs, &outputs);
+
+    if (!Postprocess(outputs, seq_cls_results)) {
+      return false;
+    }
+    return true;
+  }
+};
+
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option)) {
+    PrintUsage();
+    return -1;
+  }
+
+  std::string vocab_path = FLAGS_vocab_path;
+  if (!fastdeploy::CheckFileExists(vocab_path)) {
+    vocab_path = fastdeploy::PathJoin(FLAGS_model_dir, "vocab.txt");
+    if (!fastdeploy::CheckFileExists(vocab_path)) {
+      fastdeploy::FDERROR << "The path of vocab " << vocab_path
+                          << " doesn't exist" << std::endl;
+      PrintUsage();
+      return -1;
+    }
+  }
+  ErnieFasterTokenizer tokenizer(vocab_path);
+
+  ErnieForSequenceClassificationPredictor predictor(option, tokenizer);
+
+  std::vector<SeqClsResult> seq_cls_results;
+  std::vector<std::string> texts_ds = {"花呗收款额度限制",
+                                       "花呗支持高铁票支付吗"};
+  std::vector<std::string> texts_pair_ds = {"收钱码，对花呗支付的金额有限制吗",
+                                            "为什么友付宝不支持花呗付款"};
+  std::vector<std::vector<std::string>> batch_texts, batch_texts_pair;
+  BatchFyTexts(texts_ds, FLAGS_batch_size, &batch_texts);
+  BatchFyTexts(texts_pair_ds, FLAGS_batch_size, &batch_texts_pair);
+  for (int bs = 0; bs < batch_texts.size(); ++bs) {
+    predictor.Predict(batch_texts[bs], batch_texts_pair[bs], &seq_cls_results);
+    for (int i = 0; i < batch_texts[bs].size(); ++i) {
+      std::cout << "Batch id: " << bs << ", example id: " << i
+                << ", sentence 1: " << batch_texts[bs][i]
+                << ", sentence 2: " << batch_texts_pair[bs][i]
+                << ", label: " << seq_cls_results[i].label
+                << ", confidence:  " << seq_cls_results[i].confidence
+                << std::endl;
+    }
+  }
+  return 0;
+}
diff --git a/examples/text/ernie-3.0/python/README.md b/examples/text/ernie-3.0/python/README.md
new file mode 100644
index 0000000000..12487aa7ad
--- /dev/null
+++ b/examples/text/ernie-3.0/python/README.md
@@ -0,0 +1,71 @@
+# ERNIE 3.0 模型Python部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`seq_cls_infer.py`快速完成在CPU/GPU的文本分类任务的部署示例。
+
+## 依赖安装
+
+本项目提供的Python版本的预测器Predictor基于PaddleNLP提供的AutoTokenizer进行分词，并利用fast_tokenizer加速分词, 执行以下命令进行安装。
+
+```bash
+pip install -r requirements.txt
+```
+
+
+## 文本分类任务
+
+### 快速开始
+
+以下示例展示如何基于FastDeploy库完成ERNIE 3.0 Medium模型在CLUE Benchmark 的[AFQMC数据集](https://bj.bcebos.com/paddlenlp/datasets/afqmc_public.zip)上进行文本分类任务的Python预测部署。
+
+```bash
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd  FastDeploy/examples/text/ernie-3.0/python
+
+# 下载AFQMC数据集的微调后的ERNIE 3.0模型
+wget https://bj.bcebos.com/fastdeploy/models/ernie-3.0/ernie-3.0-medium-zh-afqmc.tgz
+tar xvfz ernie-3.0-medium-zh-afqmc.tgz
+
+# CPU 推理
+python seq_cls_infer.py --device cpu --model_dir ernie-3.0-medium-zh-afqmc
+
+# GPU 推理
+python seq_cls_infer.py --device gpu --model_dir ernie-3.0-medium-zh-afqmc
+
+```
+
+运行完成后返回的结果如下：
+
+```bash
+[INFO] fastdeploy/runtime.cc(469)::Init	Runtime initialized with Backend::ORT in Device::CPU.
+Batch id:0, example id:0, sentence1:花呗收款额度限制, sentence2:收钱码，对花呗支付的金额有限制吗, label:1, similarity:0.5819
+Batch id:1, example id:0, sentence1:花呗支持高铁票支付吗, sentence2:为什么友付宝不支持花呗付款, label:0, similarity:0.9979
+```
+
+### 参数说明
+
+`seq_cls_infer.py` 除了以上示例的命令行参数，还支持更多命令行参数的设置。以下为各命令行参数的说明。
+
+| 参数 |参数说明 |
+|----------|--------------|
+|--model_dir | 指定部署模型的目录， |
+|--batch_size |最大可测的 batch size，默认为 1|
+|--max_length |最大序列长度，默认为 128|
+|--device | 运行的设备，可选范围: ['cpu', 'gpu']，默认为'cpu' |
+|--backend | 支持的推理后端，可选范围: ['onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt']，默认为'onnx_runtime' |
+|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启，默认为False |
+|--use_fast| 是否使用FastTokenizer加速分词阶段。默认为True|
+
+## 相关文档
+
+[ERNIE 3.0模型详细介绍](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
+
+[ERNIE 3.0模型导出方法](https://github.com/PaddlePaddle/PaddleNLP/tree/release/2.4/model_zoo/ernie-3.0)
+
+[ERNIE 3.0模型C++部署方法](../cpp/README.md)
diff --git a/examples/text/ernie-3.0/python/requirements.txt b/examples/text/ernie-3.0/python/requirements.txt
new file mode 100644
index 0000000000..204cf718cd
--- /dev/null
+++ b/examples/text/ernie-3.0/python/requirements.txt
@@ -0,0 +1,2 @@
+faster_toeknizer
+paddlenlp
diff --git a/examples/text/ernie-3.0/python/seq_cls_infer.py b/examples/text/ernie-3.0/python/seq_cls_infer.py
new file mode 100644
index 0000000000..de67884a1d
--- /dev/null
+++ b/examples/text/ernie-3.0/python/seq_cls_infer.py
@@ -0,0 +1,182 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import distutils.util
+
+import numpy as np
+import faster_tokenizer
+from paddlenlp.transformers import AutoTokenizer
+import fastdeploy as fd
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_dir", required=True, help="The directory of model.")
+    parser.add_argument(
+        "--vocab_path",
+        type=str,
+        default="",
+        help="The path of tokenizer vocab.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        choices=['gpu', 'cpu'],
+        help="Type of inference device, support 'cpu' or 'gpu'.")
+    parser.add_argument(
+        "--backend",
+        type=str,
+        default='onnx_runtime',
+        choices=[
+            'onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt'
+        ],
+        help="The inference runtime backend.")
+    parser.add_argument(
+        "--batch_size", type=int, default=1, help="The batch size of data.")
+    parser.add_argument(
+        "--max_length",
+        type=int,
+        default=128,
+        help="The max length of sequence.")
+    parser.add_argument(
+        "--log_interval",
+        type=int,
+        default=10,
+        help="The interval of logging.")
+    parser.add_argument(
+        "--use_fp16",
+        type=distutils.util.strtobool,
+        default=False,
+        help="Wheter to use FP16 mode")
+    parser.add_argument(
+        "--use_fast",
+        type=distutils.util.strtobool,
+        default=False,
+        help="Whether to use fast_tokenizer to accelarate the tokenization.")
+    return parser.parse_args()
+
+
+def batchfy_text(texts, batch_size):
+    batch_texts = []
+    batch_start = 0
+    while batch_start < len(texts):
+        batch_texts += [
+            texts[batch_start:min(batch_start + batch_size, len(texts))]
+        ]
+        batch_start += batch_size
+    return batch_texts
+
+
+class ErnieForSequenceClassificationPredictor(object):
+    def __init__(self, args):
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            'ernie-3.0-medium-zh', use_faster=args.use_fast)
+        self.runtime = self.create_fd_runtime(args)
+        self.batch_size = args.batch_size
+        self.max_length = args.max_length
+
+    def create_fd_runtime(self, args):
+        option = fd.RuntimeOption()
+        model_path = os.path.join(args.model_dir, "infer.pdmodel")
+        params_path = os.path.join(args.model_dir, "infer.pdiparams")
+        option.set_model_path(model_path, params_path)
+        if args.device == 'cpu':
+            option.use_cpu()
+        else:
+            option.use_gpu()
+        if args.backend == 'paddle':
+            option.use_paddle_backend()
+        elif args.backend == 'onnx_runtime':
+            option.use_ort_backend()
+        elif args.backend == 'openvino':
+            option.use_openvino_backend()
+        else:
+            option.use_trt_backend()
+            if args.backend == 'paddle_tensorrt':
+                option.enable_paddle_to_trt()
+                option.enable_paddle_trt_collect_shape()
+            trt_file = os.path.join(args.model_dir, "infer.trt")
+            option.set_trt_input_shape(
+                'input_ids',
+                min_shape=[1, args.max_length],
+                opt_shape=[args.batch_size, args.max_length],
+                max_shape=[args.batch_size, args.max_length])
+            option.set_trt_input_shape(
+                'token_type_ids',
+                min_shape=[1, args.max_length],
+                opt_shape=[args.batch_size, args.max_length],
+                max_shape=[args.batch_size, args.max_length])
+            if args.use_fp16:
+                option.enable_trt_fp16()
+                trt_file = trt_file + ".fp16"
+            option.set_trt_cache_file(trt_file)
+        return fd.Runtime(option)
+
+    def preprocess(self, texts, texts_pair):
+        data = self.tokenizer(
+            texts,
+            texts_pair,
+            max_length=self.max_length,
+            padding=True,
+            truncation=True)
+        input_ids_name = self.runtime.get_input_info(0).name
+        token_type_ids_name = self.runtime.get_input_info(1).name
+        input_map = {
+            input_ids_name: np.array(
+                data["input_ids"], dtype="int64"),
+            token_type_ids_name: np.array(
+                data["token_type_ids"], dtype="int64")
+        }
+        return input_map
+
+    def infer(self, input_map):
+        results = self.runtime.infer(input_map)
+        return results
+
+    def postprocess(self, infer_data):
+        logits = np.array(infer_data[0])
+        max_value = np.max(logits, axis=1, keepdims=True)
+        exp_data = np.exp(logits - max_value)
+        probs = exp_data / np.sum(exp_data, axis=1, keepdims=True)
+        out_dict = {
+            "label": probs.argmax(axis=-1),
+            "confidence": probs.max(axis=-1)
+        }
+        return out_dict
+
+    def predict(self, texts, texts_pair=None):
+        input_map = self.preprocess(texts, texts_pair)
+        infer_result = self.infer(input_map)
+        output = self.postprocess(infer_result)
+        return output
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    predictor = ErnieForSequenceClassificationPredictor(args)
+    texts_ds = ["花呗收款额度限制", "花呗支持高铁票支付吗"]
+    texts_pair_ds = ["收钱码，对花呗支付的金额有限制吗", "为什么友付宝不支持花呗付款"]
+    batch_texts = batchfy_text(texts_ds, args.batch_size)
+    batch_texts_pair = batchfy_text(texts_pair_ds, args.batch_size)
+
+    for bs, (texts,
+             texts_pair) in enumerate(zip(batch_texts, batch_texts_pair)):
+        outputs = predictor.predict(texts, texts_pair)
+        for i, (sentence1, sentence2) in enumerate(zip(texts, texts_pair)):
+            print(
+                f"Batch id:{bs}, example id:{i}, sentence1:{sentence1}, sentence2:{sentence2}, label:{outputs['label'][i]}, similarity:{outputs['confidence'][i]:.4f}"
+            )
diff --git a/examples/text/ernie-3.0/serving/README.md b/examples/text/ernie-3.0/serving/README.md
index 487a5eddca..fcf3b720ec 100644
--- a/examples/text/ernie-3.0/serving/README.md
+++ b/examples/text/ernie-3.0/serving/README.md
@@ -1,4 +1,4 @@
-# Ernie-3.0 服务化部署示例
+# ERNIE 3.0 服务化部署示例
 
 ## 准备模型
 
diff --git a/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md b/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md
index aaca8a9ec9..b3ce2c1ae2 100644
--- a/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md
+++ b/examples/text/ernie-3.0/serving/models/ernie_seqcls_model/1/README.md
@@ -1 +1 @@
-本目录存放Ernie-3.0模型
+本目录存放ERNIE 3.0模型
diff --git a/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md b/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md
index aaca8a9ec9..b3ce2c1ae2 100644
--- a/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md
+++ b/examples/text/ernie-3.0/serving/models/ernie_tokencls_model/1/README.md
@@ -1 +1 @@
-本目录存放Ernie-3.0模型
+本目录存放ERNIE 3.0模型